def boxplot(x, y, data=None, legend=None, marker='o', alpha=.5, points=True, cumprob=False, yscale='linear', cmap='default', figsize=(12, 6), orderby=None, table=True, fig=None, axes=None, cgrid=None, violin=False, **kwargs): """ Boxplot function :param x: str or ndarray :param y: str or ndarray :param data: pd.Dataframe, source of data :param legend: str or ndarray color code by this column :param marker: str, default marker to use in plots :param alpha: float, alpha for plots :param points: bool, display or not display points :param cumprob: bool, display cumprob plot? :param yscale: str, default = linear, can be log or symlog too :param cmap: str, matplotlib colormap :param figsize: tuple(int,int), figure size :param orderby: str, order x axis by this param :param datatable: bool, show or not show datatable is available :param fig: matplotlib figure, if you want to re-use the figure, pass in one already created :param axes: matplotlib axes, if this is specified, the boxplot will be created on that axes, and other axes will not be created. :param kwargs: :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, y, _, legend, _, _), data = components.create_df(x, y, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('str') df[y] = df[y].astype('float').dropna() # TODO: this doesn't really work right if orderby: temp = df.sort(x) t = temp.groupby(x)[orderby] map_of_x = col.OrderedDict() for mg in sorted(t.groups): g = t.get_group(mg).reset_index() map_of_x[mg] = g[orderby][0] list_to_order = sorted([value for value in map_of_x.values()]) order = [] x_to_loc = {} for k, v in map_of_x.items(): idx = list_to_order.index(v) x_to_loc[k] = idx order.append(idx) min_, max_ = np.min(df[y]), np.max(df[y]) # if an axis is supplied, we will not create another one # if a figure is supplied, we will reuse the figure if fig and not axes: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(cumprob, legend, table, fig=fig) if violin: array = [] for arr in sorted(set(df[x])): array.append(df[df[x] == arr][y]) axm.violinplot(array, showmedians=True) else: if orderby: df.boxplot(column=y, by=x, ax=axm, showfliers=False, positions=order, fontsize=8, **kwargs) else: df.boxplot(column=y, by=x, ax=axm, showfliers=False, fontsize=8, **kwargs) # We need to identify all of the unique entries in the groupby column unique_groups = set(df[x]) nonan_grps = [] for group in unique_groups: if 'nan' not in group: nonan_grps.append(group) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors # if cgrid is already supplied, we will re-use that color grid if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: # skip over creation of legend if axes is provided axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') # add all the point level data groups = sorted(nonan_grps) for j, val in enumerate(groups): ys = df[y][df[x] == val] if orderby: pos = x_to_loc[val] xs = np.random.normal(pos, 0.05, size=len(ys)) else: # create the jitters for the points xs = np.random.normal(j + 1, 0.05, size=len(ys)) if points: # if cgrid is None, that is the standard way of creating the plot # cgrid is typically supplied by the jp.grid function if legend or cgrid is not None: cs = cgrid[df[x] == val] axm.scatter(xs, ys.values, color=cs, marker=marker, alpha=alpha, linewidths=1, **kwargs) else: axm.scatter(xs, ys.values, marker=marker, alpha=alpha, linewidths=1, **kwargs) # skip creating the cumprob plot if the axes was supplied if cumprob and not axes: if legend: cs = cgrid[df[x] == val] axc = components.cumprob(ys, axc, color=cs, alpha=alpha, swapxy=True) else: axc = components.cumprob(ys, axc, alpha=alpha, swapxy=True) # various formating axm.set_ylim(min_, max_) axm.set_yscale(yscale) axm.set_ylabel(y) for label in axm.get_xticklabels(): label.set_rotation(90) if cumprob and not axes: axc.set_ylim(min_, max_) axc.set_yscale(yscale) axc.set_yticklabels([], visible=False) for label in axc.get_xticklabels(): label.set_rotation(90) if table and not axes: components.datatable(y, data, axt, by=x) axm.set_title('') if axes: return axm fig.suptitle('') return canvas.figure
def cumprob(x, data=None, legend=None, figsize=(12, 6), xscale='linear', yscale='linear', cmap='default', alpha=0.5, marker='.', table=True, fig=None, axes=None, cgrid=None, **kwargs): """ :param x: str or ndarray :param data: is x is a str, this is a pd.Dataframe :param legend: str or ndarray, :param figsize: default is 9,6; sets the figure size :param xscale: default is linear, set the scale type [linear, log, symlog] :param yscale: default is linear, set the scale type [linear, log, symlog] :param cmap: colormap to use for plotting :param alpha: default is 0.5 :param marker: set matplotlib marker :param table: bool, default is True, prints the datatable summary to the graph :param kwargs: passed to matplotlib hist function :param fig: matplotlib figure if you want to reuse the figure. :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, _, _, legend, _, _), data = components.create_df(x, None, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('float').dropna() min_, max_ = np.min(df[x]), np.max(df[x]) if fig: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(None, legend, table, fig=fig) if table and not axes: axt = components.datatable(x, data, axt, by=legend) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') for group in sorted(set(df[legend])): axm = components.cumprob(df[df[legend] == group][x], axm, color=legend_color[group], marker=marker, alpha=alpha) else: axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha) # various formating for label in axm.get_xticklabels(): label.set_rotation(90) axm.set_xlim(min_, max_) axm.set_xscale(xscale) axm.set_yscale(yscale) axm.set_xlabel(x) if axes: return axm return canvas.figure
def cumprob(x, data=None, legend=None, figsize=(12, 6), xscale='linear', yscale='linear', cmap='default', alpha=0.5, marker='.', table=True, fig=None, axes=None, cgrid=None, **kwargs): """ :param x: str or ndarray :param data: is x is a str, this is a pd.Dataframe :param legend: str or ndarray, :param figsize: default is 9,6; sets the figure size :param xscale: default is linear, set the scale type [linear, log, symlog] :param yscale: default is linear, set the scale type [linear, log, symlog] :param cmap: colormap to use for plotting :param alpha: default is 0.5 :param marker: set matplotlib marker :param table: bool, default is True, prints the datatable summary to the graph :param kwargs: passed to matplotlib hist function :param fig: matplotlib figure if you want to reuse the figure. :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, _, _, legend, _, _), data = components.create_df(x, None, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('float').dropna() min_, max_ = np.min(df[x]), np.max(df[x]) if fig: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(None, legend, table, fig=fig) if table and not axes: axt = components.datatable(x, data, axt, by=legend) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend,loc='left') for group in sorted(set(df[legend])): axm = components.cumprob(df[df[legend] == group][x], axm, color=legend_color[group], marker=marker, alpha=alpha) else: axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha) # various formating for label in axm.get_xticklabels(): label.set_rotation(90) axm.set_xlim(min_, max_) axm.set_xscale(xscale) axm.set_yscale(yscale) axm.set_xlabel(x) if axes: return axm return canvas.figure