def cumprob(x, data=None, legend=None, figsize=(12, 6), xscale='linear', yscale='linear', cmap='default', alpha=0.5, marker='.', table=True, fig=None, axes=None, cgrid=None, **kwargs): """ :param x: str or ndarray :param data: is x is a str, this is a pd.Dataframe :param legend: str or ndarray, :param figsize: default is 9,6; sets the figure size :param xscale: default is linear, set the scale type [linear, log, symlog] :param yscale: default is linear, set the scale type [linear, log, symlog] :param cmap: colormap to use for plotting :param alpha: default is 0.5 :param marker: set matplotlib marker :param table: bool, default is True, prints the datatable summary to the graph :param kwargs: passed to matplotlib hist function :param fig: matplotlib figure if you want to reuse the figure. :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, _, _, legend, _, _), data = components.create_df(x, None, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('float').dropna() min_, max_ = np.min(df[x]), np.max(df[x]) if fig: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(None, legend, table, fig=fig) if table and not axes: axt = components.datatable(x, data, axt, by=legend) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend,loc='left') for group in sorted(set(df[legend])): axm = components.cumprob(df[df[legend] == group][x], axm, color=legend_color[group], marker=marker, alpha=alpha) else: axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha) # various formating for label in axm.get_xticklabels(): label.set_rotation(90) axm.set_xlim(min_, max_) axm.set_xscale(xscale) axm.set_yscale(yscale) axm.set_xlabel(x) if axes: return axm return canvas.figure
def boxplot(x, y, data=None, legend=None, marker='o', alpha=.5, points=True, cumprob=False, yscale='linear', cmap='default', figsize=(12, 6), orderby=None, table=True, fig=None, axes=None, cgrid=None, violin=False, **kwargs): """ Boxplot function :param x: str or ndarray :param y: str or ndarray :param data: pd.Dataframe, source of data :param legend: str or ndarray color code by this column :param marker: str, default marker to use in plots :param alpha: float, alpha for plots :param points: bool, display or not display points :param cumprob: bool, display cumprob plot? :param yscale: str, default = linear, can be log or symlog too :param cmap: str, matplotlib colormap :param figsize: tuple(int,int), figure size :param orderby: str, order x axis by this param :param datatable: bool, show or not show datatable is available :param fig: matplotlib figure, if you want to re-use the figure, pass in one already created :param axes: matplotlib axes, if this is specified, the boxplot will be created on that axes, and other axes will not be created. :param kwargs: :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, y, _, legend, _, _), data = components.create_df(x, y, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('str') df[y] = df[y].astype('float').dropna() # TODO: this doesn't really work right if orderby: temp = df.sort(x) t = temp.groupby(x)[orderby] map_of_x = col.OrderedDict() for mg in sorted(t.groups): g = t.get_group(mg).reset_index() map_of_x[mg] = g[orderby][0] list_to_order = sorted([value for value in map_of_x.values()]) order = [] x_to_loc = {} for k, v in map_of_x.items(): idx = list_to_order.index(v) x_to_loc[k] = idx order.append(idx) min_, max_ = np.min(df[y]), np.max(df[y]) # if an axis is supplied, we will not create another one # if a figure is supplied, we will reuse the figure if fig and not axes: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(cumprob, legend, table, fig=fig) if violin: array = [] for arr in sorted(set(df[x])): array.append(df[df[x] == arr][y]) axm.violinplot(array, showmedians=True) else: if orderby: df.boxplot(column=y, by=x, ax=axm, showfliers=False, positions=order, fontsize=8, **kwargs) else: df.boxplot(column=y, by=x, ax=axm, showfliers=False, fontsize=8, **kwargs) # We need to identify all of the unique entries in the groupby column unique_groups = set(df[x]) nonan_grps = [] for group in unique_groups: if 'nan' not in group: nonan_grps.append(group) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors # if cgrid is already supplied, we will re-use that color grid if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: # skip over creation of legend if axes is provided axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') # add all the point level data groups = sorted(nonan_grps) for j, val in enumerate(groups): ys = df[y][df[x] == val] if orderby: pos = x_to_loc[val] xs = np.random.normal(pos, 0.05, size=len(ys)) else: # create the jitters for the points xs = np.random.normal(j + 1, 0.05, size=len(ys)) if points: # if cgrid is None, that is the standard way of creating the plot # cgrid is typically supplied by the jp.grid function if legend or cgrid is not None: cs = cgrid[df[x] == val] axm.scatter(xs, ys.values, color=cs, marker=marker, alpha=alpha, linewidths=1, **kwargs) else: axm.scatter(xs, ys.values, marker=marker, alpha=alpha, linewidths=1, **kwargs) # skip creating the cumprob plot if the axes was supplied if cumprob and not axes: if legend: cs = cgrid[df[x] == val] axc = components.cumprob(ys, axc, color=cs, alpha=alpha, swapxy=True) else: axc = components.cumprob(ys, axc, alpha=alpha, swapxy=True) # various formating axm.set_ylim(min_, max_) axm.set_yscale(yscale) axm.set_ylabel(y) for label in axm.get_xticklabels(): label.set_rotation(90) if cumprob and not axes: axc.set_ylim(min_, max_) axc.set_yscale(yscale) axc.set_yticklabels([], visible=False) for label in axc.get_xticklabels(): label.set_rotation(90) if table and not axes: components.datatable(y, data, axt, by=x) axm.set_title('') if axes: return axm fig.suptitle('') return canvas.figure
def cumprob(x, data=None, legend=None, figsize=(12, 6), xscale='linear', yscale='linear', cmap='default', alpha=0.5, marker='.', table=True, fig=None, axes=None, cgrid=None, **kwargs): """ :param x: str or ndarray :param data: is x is a str, this is a pd.Dataframe :param legend: str or ndarray, :param figsize: default is 9,6; sets the figure size :param xscale: default is linear, set the scale type [linear, log, symlog] :param yscale: default is linear, set the scale type [linear, log, symlog] :param cmap: colormap to use for plotting :param alpha: default is 0.5 :param marker: set matplotlib marker :param table: bool, default is True, prints the datatable summary to the graph :param kwargs: passed to matplotlib hist function :param fig: matplotlib figure if you want to reuse the figure. :return: matplotlib figure """ # if no dataframe is supplied, create one if data is None: (x, _, _, legend, _, _), data = components.create_df(x, None, legend) df = data.copy() df = df.reset_index() df[x] = df[x].astype('float').dropna() min_, max_ = np.min(df[x]), np.max(df[x]) if fig: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(None, legend, table, fig=fig) if table and not axes: axt = components.datatable(x, data, axt, by=legend) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] if not axes: axl = components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') for group in sorted(set(df[legend])): axm = components.cumprob(df[df[legend] == group][x], axm, color=legend_color[group], marker=marker, alpha=alpha) else: axm = components.cumprob(df[x], axm, marker=marker, alpha=alpha) # various formating for label in axm.get_xticklabels(): label.set_rotation(90) axm.set_xlim(min_, max_) axm.set_xscale(xscale) axm.set_yscale(yscale) axm.set_xlabel(x) if axes: return axm return canvas.figure
def varchart(x: list, y: str, data: pd.DataFrame, legend=None, cumprob: bool=False, fig=None, **kwargs): """ varchart function :param x: list of strings :param y: str :param data: pd.Dataframe, source of data :param legend: str, color code by this column :param cumprob, turn on or off the cumprob plots :param table: turn on or off the datatable **kwargs: other parameters to pass into the jumpy.boxplot function """ local_data = data.copy() local_data = local_data.reset_index() strx = str(x) strl = None # ensure blox plot x axis is a string and y data is all float for var in x: local_data[var] = local_data[var].astype('str') local_data[y] = local_data[y].astype('float').dropna() # join all x's into a single column for i, part in enumerate(x): if i == 0: local_data[strx] = local_data[part].map(str) else: local_data[strx] += local_data[part].map(str) if i < len(x) - 1: local_data[strx] += ', ' # create a new legend column if legend is an array if legend and isinstance(legend, str): strl = legend elif str(legend) == strx: strl = strx elif legend: # make a column that has the concatenated legend label strl = str(legend) for i, part in enumerate(legend): if i == 0: local_data[strl] = local_data[part].map(str) else: local_data[strl] += local_data[part].map(str) if i < len(legend) - 1: local_data[strl] += ', ' if fig: fig = boxplot(x=strx, y=y, data=local_data, orderby=strx, legend=strl, cumprob=cumprob, fig=fig, **kwargs) else: fig = boxplot(x=strx, y=y, data=local_data, orderby=strx, legend=strl, cumprob=cumprob, **kwargs) axm, axc, axl, axt = components.get_axes(fig, clear=False) yvals = axm.get_ylim() colors = ['w', 'b', 'r', 'g', 'c'] # this array holds all the multple numbers for when to draw a line. # % operator is called on each of these nummods = [len(set(local_data[x[k]])) for k in range(len(x)) if k >= 1] for j, k in enumerate(reversed(range(len(nummods)))): if j == 0: nummods[k] = nummods[k] else: nummods[k] *= nummods[k-1] # generate every possible permutation of the incoming arrays for i, combo in enumerate(sorted(set(local_data[strx])), start=1): # draw in vertical lines for j, mod in enumerate(reversed(nummods)): if not i % mod: axm.vlines(i-.5, *yvals, color='{}'.format(colors[j+1]), alpha=.5) axm.set_ylim(*yvals) fig.suptitle('') return fig
def scatter(x, y, data=None, legend=None, marker='o', alpha=.5, xscale='linear', yscale='linear', cmap='default', figsize=(12, 6), fit=None, fitparams=None, table=True, fig=None, axes=None, cgrid=None, **kwargs): """ Scatter plots with regression lines :param x: str or ndarray :param y: str or ndarray :param data: pandas.Dataframe :param legend: str or ndarray, color/fit by this column :param marker: matplotlib marker style :param alpha: float, matplotlib alpha :param xscale: default == linear, any of matplotlib scale types :param yscale: default == linear, any of matplotlib scale types :param cmap: any of matplotlib cmaps :param figsize: default == (9,6); :param fit: [linear, quadratic, smooth, interpolate] :param fitparams: params to pass to fitting function :param table: show the regression table :param kwargs: :return: fig, (axes) """ # if no dataframe is supplied, create one if data is None: (x, y, _, legend, _, _), data = components.create_df(x, y, legend) if not fitparams: fitparams = {} df = data.copy() df = df[[i for i in (x, y, legend) if i]] # many of the fitting routines don't work with nan or non-sorted data. df = df.dropna() df.sort_values(x) df = df.reset_index() # fit axis is for the regression equations makefitaxis = False if fit == 'linear' or fit == 'quadratic': makefitaxis = True if fig: fig = fig canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.get_axes(fig) elif axes: axm = axes else: fig = mpl.figure.Figure(figsize=figsize, tight_layout=True) canvas = mbb.FigureCanvasAgg(fig) axm, axc, axl, axt = components.create_axes(False, legend, table and makefitaxis, fig=fig) if legend: # colormap is supposed to be the goto function to get all colormaps # should return a colorgrid that maps each point to a set of colors if cgrid is None: cgrid = common.colors.colormap(df[legend], kind='discrete', cmap=cmap) legend_color = {} for i, key in df[legend].iteritems(): legend_color[key] = cgrid[i] # if the axis is supplied, we do not want to create a legend axis if not axes: components.legend(sorted(list(legend_color.items())), axl) axl.set_title(legend, loc='left') text = '' for l in sorted(set(df[legend])): t = df[df[legend] == l] axm.scatter(x=t[x], y=t[y], c=legend_color[l], marker=marker, alpha=alpha, **kwargs) if fit: xs, ys, fn = _get_fit(x, y, t, fit, fitparams) axm.plot(xs, ys, c=legend_color[l]) if makefitaxis and table: text += '${}: {}$\n'.format(str(l).strip(), fn) if makefitaxis and table and not axes: components.regressiontable(text, axt, fig) axt.axis('off') else: axm.scatter(x=df[x], y=df[y], marker=marker, alpha=alpha, **kwargs) if fit: xs, ys, fn = _get_fit(x, y, df, fit, fitparams) axm.plot(xs, ys) if makefitaxis and table: components.regressiontable('{}'.format(fn), axt, fig) axm.set_xlim(np.min(df[x]), np.max(df[x])) axm.set_ylim(np.min(df[y]), np.max(df[y])) axm.set_yscale(yscale) axm.set_xscale(xscale) axm.set_xlabel(x) axm.set_ylabel(y) if axes: return axm return canvas.figure
def varchart(x: list, y: str, data: pd.DataFrame, legend=None, cumprob: bool = False, fig=None, **kwargs): """ varchart function :param x: list of strings :param y: str :param data: pd.Dataframe, source of data :param legend: str, color code by this column :param cumprob, turn on or off the cumprob plots :param table: turn on or off the datatable **kwargs: other parameters to pass into the jumpy.boxplot function """ local_data = data.copy() local_data = local_data.reset_index() strx = str(x) strl = None # ensure blox plot x axis is a string and y data is all float for var in x: local_data[var] = local_data[var].astype('str') local_data[y] = local_data[y].astype('float').dropna() # join all x's into a single column for i, part in enumerate(x): if i == 0: local_data[strx] = local_data[part].map(str) else: local_data[strx] += local_data[part].map(str) if i < len(x) - 1: local_data[strx] += ', ' # create a new legend column if legend is an array if legend and isinstance(legend, str): strl = legend elif str(legend) == strx: strl = strx elif legend: # make a column that has the concatenated legend label strl = str(legend) for i, part in enumerate(legend): if i == 0: local_data[strl] = local_data[part].map(str) else: local_data[strl] += local_data[part].map(str) if i < len(legend) - 1: local_data[strl] += ', ' if fig: fig = boxplot(x=strx, y=y, data=local_data, orderby=strx, legend=strl, cumprob=cumprob, fig=fig, **kwargs) else: fig = boxplot(x=strx, y=y, data=local_data, orderby=strx, legend=strl, cumprob=cumprob, **kwargs) axm, axc, axl, axt = components.get_axes(fig, clear=False) yvals = axm.get_ylim() colors = ['w', 'b', 'r', 'g', 'c'] # this array holds all the multple numbers for when to draw a line. # % operator is called on each of these nummods = [len(set(local_data[x[k]])) for k in range(len(x)) if k >= 1] for j, k in enumerate(reversed(range(len(nummods)))): if j == 0: nummods[k] = nummods[k] else: nummods[k] *= nummods[k - 1] # generate every possible permutation of the incoming arrays for i, combo in enumerate(sorted(set(local_data[strx])), start=1): # draw in vertical lines for j, mod in enumerate(reversed(nummods)): if not i % mod: axm.vlines(i - .5, *yvals, color='{}'.format(colors[j + 1]), alpha=.5) axm.set_ylim(*yvals) fig.suptitle('') return fig