def kde_plot(self, x, color, x_units="f", y_units="f", shade=False, line_width=0.25, bw=1.0, ax=None): """ Documentation: --- Description: Create kernel density curve for a feature. --- Parameters: x : array Data for plotting. color : str (some sort of color code) Color of KDE lines. x_units : str, default='f' Determines unit of measurement for x-axis tick labels. 'f' displays float. 'p' displays percentages, d' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. y_units : str, default='f' Determines unit of measurement for x-axis tick labels. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. shade : boolean, default=True Controls whether area under KDE curve is shaded. line_width : float or int, default= 0.25 Controlsthickness of kde lines bw : float, default=1.0 Scaling factor for the KDE curve. Smaller values create more detailed curves ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax # create kernel density estimation line g = sns.kdeplot( data=x, shade=shade, color=color, legend=None, linewidth=self.chart_scale * line_width, ax=ax ) # format x and y ticklabels ax.set_yticklabels( ax.get_yticklabels() * 100 if "p" in y_units else ax.get_yticklabels(), rotation=0, fontsize=1.1 * self.chart_scale, color=style.style_grey, ) ax.set_xticklabels( ax.get_xticklabels() * 100 if "p" in y_units else ax.get_xticklabels(), rotation=0, fontsize=1.1 * self.chart_scale, color=style.style_grey, ) # use label formatter utility function to customize chart labels util.util_label_formatter(ax=ax, x_units=x_units, y_units=y_units)
def bar_h(self, y, counts, color=style.style_grey, label_rotate=45, x_units="f", alpha=0.8, ax=None): """ Documentation: --- Description: Create horizontal bar plot. --- Parameters: y : array 1-dimensional array of values to plot on y-axis representing distinct categories. counts : array or string 1-dimensional array of value counts for categories. color : str (some sort of color code), default=style.style_grey Bar color. label_rotate : float or int, default=45 Number of degrees to rotate the x-tick labels. x_units : str, default='f' Determines unit of measurement for x-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. alpha : float, default=0.8 Controls transparency of bars. Accepts value between 0.0 and 1.0. ax : axes object, default=None Axis on which to place visual. """ if ax is None: ax = self.ax # plot horizontal bar plot plt.barh(y=y, width=counts, color=color, tick_label=y, alpha=alpha) # rotate x-tick labels plt.xticks(rotation=label_rotate) # customize x-ticklabels ax.set_xticklabels( ax.get_xticklabels() * 100 if "p" in x_units else ax.get_xticklabels(), rotation=0, fontsize=0.9 * self.chart_scale, color=style.style_grey, ) # use label formatter utility function to customize chart labels util.util_label_formatter(ax=ax, x_units=x_units)
def reg_plot(self, x, y, data, dot_color=style.style_grey, dot_size=2.0, line_color=style.style_blue, line_width = 0.3, x_jitter=None, x_units="f", y_units="f", x_rotate=None, alpha=0.3, ax=None): """ Documentation: --- Description: create scatter plot with regression line. --- Parameters: x : str Name of independent variable in dataframe. y : str Name of numeric target variable. data : Pandas DataFrame Pandas DataFrame including both x and y columns. dot_color : str Color of scattered dots. dot_size : float or int Size of scattered dots. line_color : str Regression line color. line_width : float or int Regression line width. x_jitter : float, default=None optional paramter for randomly displacing dots along the x_axis to enable easier visibility of individual dots. x_units : str, default='f' Determines unit of measurement for x-axis tick labels. 'f' displays float. 'p' displays percentages, d' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. y_units : str, default='f' Determines unit of measurement for y-axis tick labels. 'f' displays float. 'p' displays percentages, d' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. x_rotate : int, default=None Rotates x_axis tick mark labels x degrees. alpha : float, default=0.3 Controls transparency of objects. Accepts value between 0.0 and 1.0. ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax # create regression plot g = sns.regplot( x=x, y=y, data=data, x_jitter=x_jitter, scatter_kws={ "alpha": alpha, "color": dot_color, "s": dot_size * self.chart_scale, }, line_kws={ "color": line_color, "linewidth": self.chart_scale * line_width, }, ax=ax, ).set(xlabel=None, ylabel=None) # format x and y ticklabels ax.set_yticklabels( ax.get_yticklabels() * 100 if "p" in y_units else ax.get_yticklabels(), rotation=0, fontsize=1.1 * self.chart_scale, color=style.style_grey, ) ax.set_xticklabels( ax.get_xticklabels() * 100 if "p" in y_units else ax.get_xticklabels(), rotation=0, fontsize=1.1 * self.chart_scale, color=style.style_grey, ) # use label formatter utility function to customize chart labels util.util_label_formatter( ax=ax, x_units=x_units, y_units=y_units, x_rotate=x_rotate )
def dist_plot(self, x, color, x_units="f", y_units="f", fit=None, kde=False, x_rotate=None, alpha=0.8, bbox=(1.2, 0.9), legend_labels=None, color_map="viridis", ax=None): """ Documentation: --- Description: Creates distribution plot for numeric variable. Optionally overlays a kernel density estimation curve. --- Parameters: x : array Data for plotting. color : str (some sort of color code) Color of bars and KDE lines. x_units : str, default='f' Determines unit of measurement for x-axis tick labels. 'f' displays float. 'p' displays percentages, d' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. y_units : str, default='f' Determines unit of measurement for x-axis tick labels. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. fit : random variabe object, default=None Allows for the addition of another curve. utilizing 'norm' overlays a normal distribution over the distribution bar chart. Useful for seeing how well, or not, the distribution tracks with a specified distrbution. kde : boolean, default=False Controls whether kernel density is plotted over distribution. x_rotate : int, default=None Rotates x_axis tick mark labels x degrees. alpha : float, default=0.8 Controls transparency of objects. Accepts value between 0.0 and 1.0. bbox : tuple of floats, default=(1.2, 0.9) Coordinates for determining legend position. legend_labels : list, default=None Custom legend labels. color_map : str specifying built-in matplotlib colormap, default="viridis" Color map applied to plots. ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax # create distribution plot with an optional fit curve g = sns.distplot( a=x, kde=kde, color=color, axlabel=False, fit=fit, kde_kws={"lw": 0.2 * self.chart_scale}, hist_kws={"alpha": alpha}, ax=ax, ) # tick label font size ax.tick_params(axis="both", colors=style.style_grey, labelsize=1.2 * self.chart_scale) # format x and y ticklabels ax.set_yticklabels( ax.get_yticklabels() * 100 if "p" in y_units else ax.get_yticklabels(), rotation=0, fontsize=1.1 * self.chart_scale, color=style.style_grey, ) ax.set_xticklabels( ax.get_xticklabels() * 100 if "p" in y_units else ax.get_xticklabels(), rotation=0, fontsize=1.1 * self.chart_scale, color=style.style_grey, ) # use label formatter utility function to customize chart labels util.util_label_formatter( ax=ax, x_units=x_units, y_units=y_units, x_rotate=x_rotate ) ## create custom legend if legend_labels is None: legend_labels = legend_labels else: legend_labels = np.array(legend_labels) # generate colors color_list = style.color_gen(color_map, num=len(legend_labels)) label_color = {} for ix, i in enumerate(legend_labels): label_color[i] = color_list[ix] # create legend Patches patches = [Patch(color=v, label=k, alpha=alpha) for k, v in label_color.items()] # draw legend leg = plt.legend( handles=patches, fontsize=1.0 * self.chart_scale, loc="upper right", markerscale=0.5 * self.chart_scale, ncol=1, bbox_to_anchor=bbox, ) # label font color for text in leg.get_texts(): plt.setp(text, color="grey")
def scatter_2d(self, x, y, df=None, x_units="f", x_ticks=None, y_units="f", y_ticks=None, plot_buffer=True, size=5, axis_limits=True, color=style.style_grey, facecolor="w", alpha=0.8, x_rotate=None, ax=None): """ Documentation: --- Description: Create 2-dimensional scatter plot. --- Parameters: x : array or string Either 1-dimensional array of values or a column name in a Pandas DataFrame. y : array or string Either 1-dimensional array of values or a column name in a Pandas DataFrame. df : Pandas DataFrame, default=None Pandas DataFrame containing data to plot. Can be any size - plotted columns will be chosen by columns names specified in x and y parameters. x_units : str, default='f' Determines unit of measurement for x-axis tick labels. 'f' displays float. 'p' displays percentages, d' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. x_ticks : array, default=None Custom x-tick labels. y_units : str, default='f' Determines unit of measurement for x-axis tick labels. 'f' displays float. 'p' displays percentages, d' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. y_ticks : array, default=None Custom y-tick labels. plot_buffer : bool, default=True Controls whether dynamic plot buffer function is executed. size : int or float, default=5 Size of scattered dots. axis_limits : bool, default=True Controls whether dynamic axis limit setting function is executed. color : str (color code of some sort), default=style.style_grey Color of scattered dots facecolor : str (color code of some sort), default='w' Face color of scattered dots. alpha : float, default=0.8 Controls transparency of objects. Accepts value between 0.0 and 1.0. x_rotate : int, default=None Rotates x-axis tick mark labels x degrees. ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax # if a Pandas DataFrame is passed to function, create x and y arrays using columns names passed into function if df is not None: x = df[x].values.reshape(-1, 1) y = df[y].values.reshape(-1, 1) # else reshape arrays else: x = x.reshape(-1, 1) y = y.reshape(-1, 1) # plot 2-dimensional scatter plt.scatter( x=x, y=y * 100 if "p" in y_units else y, color=color, s=size * self.chart_scale, alpha=alpha, facecolor=facecolor, linewidth=0.167 * self.chart_scale, ) # optionally set axis lower / upper limits if axis_limits: x_min, x_max, y_min, y_max = util.util_set_axes(x=x, y=y) plt.axis([x_min, x_max, y_min, y_max]) # optionally create smaller buffer around plot area to prevent cutting off elements if plot_buffer: util.util_plot_buffer(ax=ax, x=0.02, y=0.02) # optionally creates custom x-tick labels if x_ticks is not None: ax.set_xticks(x_ticks) # optionally creates custom y-tick labels if y_ticks is not None: ax.set_yticks(y_ticks) # format x and y ticklabels ax.set_yticklabels( ax.get_yticklabels() * 100 if "p" in y_units else ax.get_yticklabels(), rotation=0, fontsize=1.0 * self.chart_scale, color=style.style_grey, ) ax.set_xticklabels( ax.get_xticklabels() * 100 if "p" in y_units else ax.get_xticklabels(), rotation=0, fontsize=1.0 * self.chart_scale, color=style.style_grey, ) # use label formatter utility function to customize chart labels util.util_label_formatter(ax=ax, x_units=x_units, y_units=y_units, x_rotate=x_rotate)
def scatter_2d_hue(self, x, y, target, label, df=None, x_units="f", x_ticks=None, y_units="f", y_ticks=None, plot_buffer=True, size=10, axis_limits=True, color=style.style_grey, facecolor="w", bbox=(1.2, 0.9), color_map="viridis", alpha=0.8, x_rotate=None, ax=None): """ Documentation: --- Description: Create 2-dimensional scatter plot with a third dimension represented as a color hue in the scatter dots. --- Parameters: x : array or string Either 1-dimensional array of values or a column name in a Pandas DataFrame. y : array or string Either 1-dimensional array of values or a column name in a Pandas DataFrame. target : array or string Either 1-dimensional array of values or a column name in a Pandas DataFrame. label : list Labels corresponding to color hue. df : Pandas DataFrame, default=None Pandas DataFrame containing data to plot. Can be any size - plotted columns will be chosen by columns names specified in x and y parameters. x_units : str, default='d' Determines unit of measurement for x-axis tick labels. 'f' displays float. 'p' displays percentages, d' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. x_ticks : array, default=None Custom x-tick labels. y_units : str, default='d' Determines unit of measurement for x-axis tick labels. 'f' displays float. 'p' displays percentages, d' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. y_ticks : array, default=None Custom y-tick labels. plot_buffer : bool, default=True Controls whether dynamic plot buffer function is executed. size : int or float, default=10 Size of scattered dots. axis_limits : bool, default=True Controls whether dynamic axis limit setting function is executed. color : str (color code of some sort), default=style.style_grey Color of scattered dots facecolor : str (color code of some sort), default='w' Face color of scattered dots bbox : tuple of floats, default=(1.2, 0.9) Coordinates for determining legend position. color_map : str specifying built-in matplotlib colormap, default="viridis" Color map applied to plots. alpha : float, default=0.8 Controls transparency of objects. Accepts value between 0.0 and 1.0. x_rotate : int, default=None Rotates x-axis tick mark labels x degrees. ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax # if a Pandas DataFrame is passed to function, create x and y and target arrays using columns names # passed into function. Also concatenates columns into single object if df is not None: x = df[[x, y, target]].values x = df[x].values y = df[y].values target = df[target].values # concatenate the x, y and target arrays else: x = np.c_[x, y, target] # unique target values target_ids = np.unique(x[:, 2]) # generate color list color_list = style.color_gen(name=color_map, num=len(target_ids)) # loop through sets of target values, labels and colors to create 2_d scatter with hue for target_id, target_name, color in zip(target_ids, label, color_list): plt.scatter( x=x[x[:, 2] == target_id][:, 0], y=x[x[:, 2] == target_id][:, 1], color=color, label=target_name, s=size * self.chart_scale, alpha=alpha, facecolor="w", linewidth=0.234 * self.chart_scale, ) # add legend to figure if label is not None: plt.legend( loc="upper right", bbox_to_anchor=bbox, ncol=1, frameon=True, fontsize=1.1 * self.chart_scale, ) # optionally set axis lower / upper limits if axis_limits: x_min, x_max, y_min, y_max = util.util_set_axes(x=x, y=y) plt.axis([x_min, x_max, y_min, y_max]) # optionally create smaller buffer around plot area to prevent cutting off elements if plot_buffer: util.util_plot_buffer(ax=ax, x=0.02, y=0.02) # optionally creates custom x-tick labels if x_ticks is not None: ax.set_xticks(x_ticks) # optionally creates custom y-tick labels if y_ticks is not None: ax.set_yticks(y_ticks) # format x and y ticklabels ax.set_yticklabels( ax.get_yticklabels() * 100 if "p" in y_units else ax.get_yticklabels(), rotation=0, fontsize=1.0 * self.chart_scale, color=style.style_grey, ) ax.set_xticklabels( ax.get_xticklabels() * 100 if "p" in y_units else ax.get_xticklabels(), rotation=0, fontsize=1.0 * self.chart_scale, color=style.style_grey, ) # use label formatter utility function to customize chart labels util.util_label_formatter(ax=ax, x_units=x_units, y_units=y_units, x_rotate=x_rotate)
def facet_cat_num_scatter(self, df, x, y, cat_row=None, cat_col=None, split=None, bbox=None, aspect=1, alpha=0.8, height=4, legend_labels=None, x_units="f", y_units="f", color_map="viridis"): """ Documentation: --- Description: Creates scatter plots of two numeric variables and allows for faceting by up to two categorical variables along the column and/or row axes of the figure. --- Parameters: df : Pandas DataFrame Pandas DataFrame containing data for plotting. x : str Numeric variable to plot along x-axis. y : str Numeric variable to plot along y-axis. cat_row : str Categorical variable faceted along the row axis. cat_col : str Categorical variable faceted along the column axis. split : str Categorical variable for faceting the num_col variable. bbox : tuple of floats, default=None Coordinates for determining legend position. aspect : float, default=1 Higher values create wider plot, lower values create narrow plot, while keeping height constant. alpha : float, default=0.8 Controls transparency of objects. Accepts value between 0.0 and 1.0. height : float, default=4 Height in inches of each facet. legend_labels : list, default=None Custom legend labels. x_units : str, default='f' Determines unit of measurement for x-axis tick labels. 'f' displays float. 'p' displays percentages, d' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. y_units : str, default='f' Determines unit of measurement for x-axis tick labels. 'f' displays float. 'p' displays percentages, d' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. color_map : str specifying built-in matplotlib colormap, default="viridis" Color map applied to plots. """ # create FacetGrid object g = sns.FacetGrid( df, col=cat_col, row=cat_row, hue=split, palette=sns.color_palette( style.color_gen(color_map, num=len(np.unique(df[split].values)))), hue_order=df[split].sort_values().drop_duplicates().values.tolist() if split is not None else None, height=height, aspect=aspect, margin_titles=True, ) # map scatter plot to FacetGrid object g = g.map(plt.scatter, x, y, s=1.2 * self.chart_scale) # format x any y ticklabels, x and y labels, and main title for ax in g.axes.flat: _ = ax.set_yticklabels( ax.get_yticklabels() * 100 if "p" in y_units else ax.get_yticklabels(), rotation=0, fontsize=0.8 * self.chart_scale, color=style.style_grey, ) _ = ax.set_xticklabels( ax.get_xticklabels(), rotation=0, fontsize=0.8 * self.chart_scale, color=style.style_grey, ) _ = ax.set_ylabel( ax.get_ylabel(), rotation=90, fontsize=1.05 * self.chart_scale, color=style.style_grey, ) _ = ax.set_xlabel( ax.get_xlabel(), rotation=0, fontsize=1.05 * self.chart_scale, color=style.style_grey, ) _ = ax.set_title( ax.get_title(), rotation=0, fontsize=1.05 * self.chart_scale, color=style.style_grey, ) # custom tick label formatting util.util_label_formatter(ax=ax, x_units=x_units, y_units=y_units) if ax.texts: # this contains the right ylabel text txt = ax.texts[0] ax.text( txt.get_unitless_position()[0], txt.get_unitless_position()[1], txt.get_text(), transform=ax.transAxes, va="center", fontsize=1.05 * self.chart_scale, color=style.style_grey, rotation=-90, ) # remove the original text ax.texts[0].remove() ## create custom legend # create labels if split is not None: if legend_labels is None: legend_labels = (df[df[split].notnull()][split].sort_values(). drop_duplicates().values.tolist()) else: legend_labels = np.array(legend_labels) # generate colors color_list = style.color_gen(color_map, num=len(legend_labels)) label_color = {} for ix, i in enumerate(legend_labels): label_color[i] = color_list[ix] # create legend Patches patches = [ Patch(color=v, label=k, alpha=alpha) for k, v in label_color.items() ] # draw legend leg = plt.legend( handles=patches, fontsize=1.0 * self.chart_scale, loc="upper right", markerscale=0.5 * self.chart_scale, ncol=1, bbox_to_anchor=bbox, ) # label font color for text in leg.get_texts(): plt.setp(text, color="grey")
def facet_two_cat_bar(self, df, x, y, split, x_units=None, y_units=None, bbox=None, alpha=0.8, legend_labels=None, filter_nan=True, color_map="viridis", ax=None): """ Documentation: Description: Creates a series of bar plots that count a variable along the y_axis and separate the counts into bins based on two category variables. --- Parameters: df : Pandas DataFrame Pandas DataFrame containing data for plotting. x : str Categorical variable to plot along x-axis. y : str Pandas DataFrame containing data for plotting. ariable to be counted along y-axis. split : str Categorical variable for faceting the num_col variable. x_units : str, default=None Determines unit of measurement for x-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. y_units : str, default=None Determines unit of measurement for x-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. bbox : tuple of floats, default=None Coordinates for determining legend position. alpha : float, default=0.8 Controls transparency of objects. Accepts value between 0.0 and 1.0. legend_labels : list, default=None Custom legend labels. filter_nan : bool, default=True Remove records that have a null value in the column specified by the 'x' parameter. color_map : str specifying built-in matplotlib colormap, default="viridis" Color map applied to plots. ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax # remove nans from x columns if filter_nan: df = df.dropna(subset=[x]) # create bar plot g = sns.barplot( x=x, y=y, hue=split, data=df, palette=sns.color_palette( style.color_gen("viridis", num=len(np.unique(df[split].values)))), order=df[x].sort_values().drop_duplicates().values.tolist(), hue_order=df[split].sort_values().drop_duplicates().values.tolist() if split is not None else None, ax=ax, ci=None, ) # format x-tick labels g.set_xticklabels( g.get_xticklabels(), rotation=0, fontsize=1.05 * self.chart_scale, color=style.style_grey, ) # format y-tick labels g.set_yticklabels( g.get_yticklabels() * 100 if "p" in y_units else g.get_yticklabels(), rotation=0, fontsize=1.05 * self.chart_scale, color=style.style_grey, ) # format x-axis label g.set_xlabel( g.get_xlabel(), rotation=0, fontsize=1.35 * self.chart_scale, color=style.style_grey, ) # format y-axis label g.set_ylabel( g.get_ylabel(), rotation=90, fontsize=1.35 * self.chart_scale, color=style.style_grey, ) # format title g.set_title( g.get_title(), rotation=0, fontsize=1.5 * self.chart_scale, color=style.style_grey, ) ## create custom legend # create labels if split is not None: if legend_labels is None: legend_labels = (df[df[split].notnull()][split].sort_values(). drop_duplicates().values.tolist()) else: legend_labels = np.array(legend_labels) # generate colors color_list = style.color_gen(color_map, num=len(legend_labels)) label_color = {} for ix, i in enumerate(legend_labels): label_color[i] = color_list[ix] # create legend Patches patches = [ Patch(color=v, label=k, alpha=alpha) for k, v in label_color.items() ] # draw legend leg = plt.legend( handles=patches, fontsize=1.25 * self.chart_scale, loc="upper right", markerscale=0.5 * self.chart_scale, ncol=1, bbox_to_anchor=bbox, ) # label font color for text in leg.get_texts(): plt.setp(text, color="grey") # use label formatter utility function to customize chart labels util.util_label_formatter(ax=ax, x_units=x_units, y_units=y_units)
def facet_cat(self, df, feature, label_rotate=0, x_units="s", y_units="f", bbox=(1.2, 0.9), alpha=0.8, legend_labels=None, color_map="viridis", ax=None): """ Documentation: --- Description: Creates a count plot for a categorical variable and facet the variable by another categorical variable. --- Parameters: df : Pandas DataFrame Pandas DataFrame containing data for plotting. feature : str Name of column that contains the category values to be used for faceting/ label_rotate : float or int, default=0 Number of degrees to rotate the x-tick labels. x_units : str, default='f' Determines unit of measurement for x-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. y_units : str, default='s' Determines unit of measurement for y-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. bbox : tuple of floats, default=(1.2, 0.9) Coordinates for determining legend position. alpha : float, default=0.8 Controls transparency of objects. Accepts value between 0.0 and 1.0. legend_labels : list, default=None Custom legend labels. color_map : str specifying built-in matplotlib colormap, default="viridis" Color map applied to plots. ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax ixs = np.arange(df.shape[0]) bar_width = 0.35 feature_dict = {} for feature in df.columns[1:]: feature_dict[feature] = df[feature].values.tolist() # generate color list if isinstance(color_map, str): color_list = style.color_gen(name=color_map, num=len(feature_dict.keys())) elif isinstance(color_map, list): color_list = color_map for feature_ix, (k, v) in enumerate(feature_dict.items()): plt.bar( ixs + (bar_width * feature_ix), feature_dict[k], bar_width, alpha=alpha, color=color_list[feature_ix], label=str(k), ) # wrap long x-tick labels plt.xticks( ixs[:df.shape[0]] + bar_width / 2, [ "\n".join(textwrap.wrap(str(i).replace("_", " "), 12)) for i in df.iloc[:, 0].values ], ) plt.xticks(rotation=label_rotate) ## create custom legend # create labels if legend_labels is None: legend_labels = np.arange(len(color_list)) else: legend_labels = np.array(legend_labels) # define colors label_color = {} for ix, i in enumerate(legend_labels): label_color[i] = color_list[ix] # create legend Patches patches = [ Patch(color=v, label=k, alpha=alpha) for k, v in label_color.items() ] # draw legend leg = plt.legend( handles=patches, fontsize=0.95 * self.chart_scale, loc="upper right", markerscale=0.3 * self.chart_scale, ncol=1, bbox_to_anchor=bbox, ) # label font color for text in leg.get_texts(): plt.setp(text, color="grey") ### general formatting # if data is float dtype, then format as a number if df.iloc[:, 0].values.dtype == np.float: x_units = "f" # otherwise represent data as a string else: x_units = "s" # use label formatter utility function to customize chart labels util.util_label_formatter(ax=ax, x_units=x_units, y_units=y_units) # tick label font size ax.tick_params(axis="both", colors=style.style_grey, labelsize=1.2 * self.chart_scale) # dynamically set x-axis label size if 7 < len(feature_dict[feature]) <= 10: ax.tick_params(axis="x", colors=style.style_grey, labelsize=0.9 * self.chart_scale) elif 10 < len(feature_dict[feature]) <= 20: ax.tick_params(axis="x", colors=style.style_grey, labelsize=0.75 * self.chart_scale) elif len(feature_dict[feature]) > 20: ax.tick_params(axis="x", colors=style.style_grey, labelsize=0.6 * self.chart_scale)
def box_plot_h(self, x, y, data, color=style.style_grey, x_units="f", bbox=(1.05, 1), color_map="viridis", suppress_outliers=False, alpha=0.8, legend_labels=None, ax=None): """ Documentation: --- Description: create horizontal box plots. useful for evaluating a object target on the y_axis vs. a number independent variable on the x_axis. --- Parameters: x : str Name of categorical variable. y : str Name of numeric variable. data : Pandas DataFrame Pandas DataFrame including both x and y data. color : str (some sort of color code), default=style.style_grey Determines color of box plot figures. Ideally this object is a color palette, which can be a default seaborn palette, a custom seaborn palette, or a custom matplotlib cmap. x_units : str, default='f' Determines unit of measurement for x-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. bbox : tuple of floats, default=(1.05, 1.0) Coordinates for determining legend position. color_map : str specifying built-in matplotlib colormap, default="viridis" Color map applied to plots. suppress_outliers : boolean, default=False Controls removal of outliers from box/whisker plots alpha : float, default=0.8 Controls transparency of bars. Accepts value between 0.0 and 1.0. legend_labels : list, default=None Custom legend labels. ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax # create horizontal box plot g = sns.boxplot( x=x, y=y, hue=y, data=data, orient="h", palette=sns.color_palette( style.color_gen(color_map, num=len(np.unique(data[y].values))) ), showfliers=suppress_outliers, ax=ax, ).set(xlabel=None, ylabel=None) # fade box plot figures by reducing alpha plt.setp(ax.artists, alpha=alpha) ax.yaxis.set_visible(False) # tick label font size ax.tick_params(axis="both", colors=style.style_grey, labelsize=1.2 * self.chart_scale) # use label formatter utility function to customize chart labels util.util_label_formatter(ax=ax, x_units=x_units) ## custom legend # use legend labels if provided, otherwise use unique values in y column if legend_labels is None: legend_labels = np.unique(data[y].values) else: legend_labels = np.array(legend_labels) # generate colors color_list = style.color_gen(color_map, num=len(legend_labels)) label_color = {} for ix, i in enumerate(legend_labels): label_color[i] = color_list[ix] # create legend Patches patches = [Patch(color=v, label=k, alpha=alpha) for k, v in label_color.items()] # draw legend leg = plt.legend( handles=patches, fontsize=1.0 * self.chart_scale, loc="upper right", markerscale=0.5 * self.chart_scale, ncol=1, bbox_to_anchor=bbox, ) # label font color for text in leg.get_texts(): plt.setp(text, color="grey")
def box_plot_v(self, x, y, data, color, label_rotate=0, y_units="f", color_map="viridis", alpha=0.8, suppress_outliers=False, ax=None): """ Documentation: --- Description: Create vertical box plots. Useful for evaluating a numeric variable on the y-axis versus several different category segments on the x-axis. --- Parameters: x : str Name of categorical variable. y : str Name of numeric variable. data : Pandas DataFrame Pandas DataFrame including both x and y data. color : str Determines color of box plot figures. Ideally this object is a color palette, which can be a default seaborn palette, a custom seaborn palette, or a custom matplotlib cmap. label_rotate : float or int, default=45 Number of degrees to rotate the x-tick labels. y_units : str, default='f' Determines unit of measurement for y-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. color_map : str specifying built-in matplotlib colormap, default="viridis" Color map applied to plots. alpha : float, default=0.8 Controls transparency of objects. Accepts value between 0.0 and 1.0. suppress_outliers : boolean, default=False Controls removal of outliers from box/whisker plots. ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax # create vertical box plot. g = sns.boxplot( x=x, y=y, data=data, orient="v", palette=sns.color_palette( style.color_gen(color_map, num=len(np.unique(data[x].values))) ), showfliers=suppress_outliers, ax=ax, ).set(xlabel=None, ylabel=None) # tick label font size ax.tick_params(axis="both", colors=style.style_grey, labelsize=1.2 * self.chart_scale) # resize x-axis labels as needed unique = np.unique(data[x]) if len(unique) > 10 and len(unique) <= 20: ax.tick_params( axis="x", colors=style.style_grey, labelsize=1.0 * self.chart_scale ) elif len(unique) > 20: ax.tick_params( axis="x", colors=style.style_grey, labelsize=0.9 * self.chart_scale ) else: ax.tick_params( axis="x", colors=style.style_grey, labelsize=1.2 * self.chart_scale ) # resize y-axis ax.tick_params(axis="y", labelsize=1.2 * self.chart_scale) # fade box plot figures by reducing alpha. plt.setp(ax.artists, alpha=alpha) # rotate x-tick labels plt.xticks(rotation=label_rotate) ax.yaxis.set_visible(True) # use label formatter utility function to customize chart labels util.util_label_formatter(ax=ax, y_units=y_units)
def stacked_bar_h(self, df, label_rotate=0, x_units="p", alpha=0.8, color_map="viridis", bbox=(1.2,0.9), legend_labels=None, ax=None): """ Documentation: --- Description: create horizontal bar plot. --- Parameters: df : Pandas DataFrame 1-dimensional array of values to plot on y-axis representing distinct categories. label_rotate : float or int, default=45 Number of degrees to rotate the x-tick labels. x_units : str, default='f' Determines unit of measurement for x-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. alpha : float, default=0.8 Controls transparency of bars. Accepts value between 0.0 and 1.0. color_map : str specifying built-in matplotlib colormap, default="viridis" Color map applied to plots. bbox : tuple of floats, default=(1.2, 0.9) Coordinates for determining legend position. legend_labels : list, default=None Custom legend labels. ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax # define class label count and bar color list y = np.arange(len(df.index)) color_list = style.color_gen(color_map, num=len(y)) # define category labels category_levels = np.arange(len(df.columns)) # plot stacked bars for class_label, color in zip(np.arange(len(y)), color_list): # first category if class_label == 0: plt.barh( y=category_levels, width=df.loc[class_label], color=color, alpha=alpha, ) # stack all additional categories on previous categories else: plt.barh( y=category_levels, width=df.loc[class_label], left=df.drop([x for x in df.index if x >= class_label]).sum(axis=0), color=color, alpha=alpha, ) # convert x-axis tick labels to percentages ax.set_xticklabels( ax.get_xticklabels() * 100 if "p" in x_units else ax.get_xticklabels(), rotation=0, color=style.style_grey, ) ## create custom legend if legend_labels is None: legend_labels = np.arange(len(color_list)) else: legend_labels = np.array(legend_labels) # define colors label_color = {} for ix, i in enumerate(legend_labels): label_color[i] = color_list[ix] # create legend Patches patches = [Patch(color=v, label=k, alpha=alpha) for k, v in label_color.items()] # draw legend leg = plt.legend( handles=patches, fontsize=0.95 * self.chart_scale, loc="upper right", markerscale=0.3 * self.chart_scale, ncol=1, bbox_to_anchor=bbox, ) # label font color for text in leg.get_texts(): plt.setp(text, color="grey") # use label formatter utility function to customize chart labels util.util_label_formatter(ax=ax, x_units=x_units) # overwrite y-axis labels with category labels try: columns = df.columns.map(np.int) except ValueError: columns = df.columns # dynamically size y-labels if 7 < len(category_levels) <= 10: ax.tick_params(axis="y", colors=style.style_grey, labelsize=0.9 * self.chart_scale) elif 10 < len(category_levels) <= 20: ax.tick_params(axis="y", colors=style.style_grey, labelsize=0.75 * self.chart_scale) elif len(category_levels) > 20: ax.tick_params(axis="y", colors=style.style_grey, labelsize=0.6 * self.chart_scale) ax.tick_params(axis="x", colors=style.style_grey, labelsize=1.2 * self.chart_scale) # wrap long y-tick labels plt.yticks( category_levels, [ "\n".join(textwrap.wrap(str(i).replace("_", " "), 12)) for i in columns ], )
def bar_v(self, x, counts, color=style.style_grey, x_labels=None, x_tick_wrap=False, label_rotate=0, y_units="f", alpha=0.8, ax=None): """ Documentation: --- Description: Create vertical bar plot. --- Parameters: x : array 1-dimensional array of values to plot on x-axis representing distinct categories. counts : array or string 1-dimensional array of value counts for categories. color : str (some sort of color code), default=style.style_grey Bar color. x_labels : list, default=None Custom x_axis text labels. x_tick_wrap : bool, default=False Wrap x_axis tick labels. label_rotate : float or int, default=0 Number of degrees to rotate the x-tick labels. y_units : str, default='f' Determines unit of measurement for y-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. alpha : float, default=0.8 Controls transparency of bars. Accepts value between 0.0 and 1.0. ax : axes object, default=None Axis on which to place visual. """ if ax is None: ax = self.ax # custom labels labels = x_labels if x_labels is not None else x # create vertical bar plot. plt.bar( x=x, height=counts, color=color, tick_label=labels, alpha=alpha, ) # rotate x-tick labels plt.xticks(rotation=label_rotate) # dynamically resize x_axis labels if len(x) > 10 and len(x) <= 20: ax.tick_params( axis="x", colors=style.style_grey, labelsize=1.0 * self.chart_scale ) elif len(x) > 20: ax.tick_params( axis="x", colors=style.style_grey, labelsize=0.9 * self.chart_scale ) else: ax.tick_params( axis="x", colors=style.style_grey, labelsize=1.2 * self.chart_scale ) # wrap long x-tick labels if x_tick_wrap and type(labels): try: x = ["\n".join(textwrap.wrap(i.replace("_", " "), 12)) for i in labels] ax.set_xticklabels(x) except AttributeError: pass # format y-ticklabels ax.set_yticklabels( ax.get_yticklabels() * 100 if "p" in y_units else ax.get_yticklabels(), rotation=0, fontsize=1.2 * self.chart_scale, color=style.style_grey, ) # use label formatter utility function to customize chart labels util.util_label_formatter(ax=ax, y_units=y_units)
def line(self, x, y, label=None, df=None, linecolor=style.style_grey, linestyle=None, bbox=(1.2, 0.9), x_units="f", x_ticks=None, y_units="f", y_ticks=None, marker_on=False, plot_buffer=False, axis_limits=False, ax=None): """ Documentation: --- Description: Create single line plot. --- Parameters: x : list, array or string 1-dimensional array of values to plot along x-axis y : list, array or string 1-dimensional array of values to plot along y-axis label : str : default=None Legend label for line. df : Pandas DataFrame, default=None Pandas DataFrame containing data to plot. can be any size, as plotted columns will be chosen by columns names specified in x and y parameters. linecolor : str, default=style.style_grey Line color. linestyle : str, default=None Line style. bbox : tuple of floats, default=(1.2, 0.9) Coordinates for determining legend position. x_units : str, default='f' Determines unit of measurement for x-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. x_ticks : array, default=None Custom x-tick labels. y_units : str, default='f' Determines unit of measurement for y-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. y_ticks : array, default=None Custom y_tick labels. marker_on : bool, default=False Controls whether to show line with markers for each data element. plot_buffer : bool, default=False Controls whether dynamic plot buffer function is executed to ensure visual elements are not cut-off at the figure borders. axis_limits : bool, default=False Controls whether dynamic axis limit setting function is executed. ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax # if a Pandas DataFrame is passed to function, create x and y arrays using columns names passed into function if df is not None: if isinstance(df.index, pd.core.indexes.base.Index): x = df.index.values else: x = df[x].values y = df[y].values else: # convert input list to array x = np.array(x) if isinstance(x, list) else x y = np.array(y) if isinstance(y, list) else y # reshape arrays if necessar x = x.reshape(-1, 1) if len(x.shape) == 1 else x y = y.reshape(-1, 1) if len(y.shape) == 1 else y # add line to plot plt.plot( x, y * 100 if "p" in y_units else y, color=linecolor, linestyle=linestyle, linewidth=0.247 * self.chart_scale, label=label, marker="." if marker_on else None, markersize=17 if marker_on else None, markerfacecolor="w" if marker_on else None, markeredgewidth=2.2 if marker_on else None, ) # add legend to figure if label is not None: plt.legend( loc="upper right", bbox_to_anchor=bbox, ncol=1, frameon=True, fontsize=1.1 * self.chart_scale, ) # optionally set axis lower / upper limits if axis_limits: x_min, x_max, y_min, y_max = util.util_set_axes(x=x, y=y) plt.axis([x_min, x_max, y_min, y_max]) # optionally create smaller buffer around plot area to prevent cutting off elements if plot_buffer: util.util_plot_buffer(ax=ax, x=0.02, y=0.02) # optionally creates custom x-tick labels if x_ticks is not None: ax.set_xticks(x_ticks) # optionally creates custom y-tick labels if y_ticks is not None: ax.set_yticks(y_ticks) # format x and y ticklabels ax.set_yticklabels( ax.get_yticklabels() * 100 if "p" in y_units else ax.get_yticklabels(), rotation=0, fontsize=1.0 * self.chart_scale, color=style.style_grey, ) ax.set_xticklabels( ax.get_xticklabels() * 100 if "p" in y_units else ax.get_xticklabels(), rotation=0, fontsize=1.0 * self.chart_scale, color=style.style_grey, ) # axis tick label formatting util.util_label_formatter(ax=ax, x_units=x_units, y_units=y_units)
def multi_line(self, x, y, label=None, df=None, linecolor=None, linestyle=None, bbox=(1.2, 0.9), x_units="f", x_ticks=None, y_units="f", y_ticks=None, marker_on=False, plot_buffer=False, axis_limits=False, color_map="viridis", ax=None): """ Documentation: Description: Create single plot with multiple lines. Capable of adjusting which axis will have the same data for each line and which will have different data for each line. --- Parameters: x : array or string Either 1-dimensional array of values, a multidimensional array of values, a list of columns in a Pandas DataFrame, or a column name in a Pandas DataFrame. y : array or string Either 1-dimensional array of values, a multidimensional array of values, a list of columns in a Pandas DataFrame, or a column name in a Pandas DataFrame. label : list of strings : default=None Custom legend label for each line. df : Pandas DataFrame, default=None Pandas DataFrame containing data to plot. Can be any size, as plotted columns will be chosen by columns names specified in x and y parameters. linecolor : str, default=None Line colors. If None, utilizes color_map linestyle : str, default=None Line style. bbox : tuple, default=(1.2, 0.9) Coordinates for determining legend position. x_units : str, default='d' Determines unit of measurement for x-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. x_ticks : array, default=None Custom x-tick labels. y_units : str, default='d' Determines unit of measurement for x-axis tick labels. 's' displays string. 'f' displays float. 'p' displays percentages, 'd' displays dollars. Repeat character (e.g 'ff' or 'ddd') for additional decimal places. y_ticks : array, default=None Custom y-tick labels. marker_on : bool, default=False Controls whether to show line with markers for each data element. plot_buffer : bool, default=False Controls whether dynamic plot buffer function is executed to ensure visual elements are not cut-off at the figure borders. axis_limits : bool, default=False Controls whether dynamic axis limit setting function is executed. color_map : str specifying built-in matplotlib colormap, default="viridis" Color map applied to plots. ax : axes object, default=None Axis object for the visualization. """ if ax is None: ax = self.ax # if a Pandas DataFrame is passed to function, create x and y arrays using columns names passed into function if df is not None: if isinstance(df.index, pd.core.indexes.base.Index): x = df.index.values else: x = df[x].values y = df[y].values else: # convert input list to array x = np.array(x) if isinstance(x, list) else x y = np.array(y) if isinstance(y, list) else y x = x.reshape(-1, 1) if len(x.shape) == 1 else x y = y.reshape(-1, 1) if len(y.shape) == 1 else y # generate color list color_list = style.color_gen(name=color_map, num=y.shape[1]) # add multiple lines to plot for ix in np.arange(y.shape[1]): y_col = y[:, ix] plt.plot( x, y_col * 100 if "p" in y_units else y_col, color=linecolor if linecolor is not None else color_list[ix], linestyle=linestyle if linestyle is not None else style.style_line_style[0], linewidth=0.247 * self.chart_scale, label=label[ix] if label is not None else None, marker="." if marker_on else None, markersize=17 if marker_on else None, markerfacecolor="w" if marker_on else None, markeredgewidth=2.2 if marker_on else None, ) # add legend to figure if label is not None: plt.legend( loc="upper right", bbox_to_anchor=bbox, ncol=1, frameon=True, fontsize=1.1 * self.chart_scale, ) # optionally set axis lower / upper limits if axis_limits: x_min, x_max, y_min, y_max = util.util_set_axes(x=x, y=y) plt.axis([x_min, x_max, y_min, y_max]) # optionally create smaller buffer around plot area to prevent cutting off elements if plot_buffer: util.util_plot_buffer(ax=ax, x=0.02, y=0.02) # optionally creates custom x-tick labels if x_ticks is not None: ax.set_xticks(x_ticks) # optionally creates custom y-tick labels if y_ticks is not None: ax.set_yticks(y_ticks) # format x and y ticklabels ax.set_yticklabels( ax.get_yticklabels() * 100 if "p" in y_units else ax.get_yticklabels(), rotation=0, fontsize=1.1 * self.chart_scale, color=style.style_grey, ) ax.set_xticklabels( ax.get_xticklabels() * 100 if "p" in y_units else ax.get_xticklabels(), rotation=0, fontsize=1.1 * self.chart_scale, color=style.style_grey, ) # axis tick label formatting util.util_label_formatter(ax=ax, x_units=x_units, y_units=y_units)