def makeFigure8(formulation, LHsamples, paramBounds, normSamples, thresholds): # scenarios for event plots scenarios = ['Mean', 'C1', 'Std', 'All3_Flood'] titles = [ 'SOW Trajectory 1', 'SOW Trajectory 2', 'SOW Trajectory 3', 'SOW Trajectory 4' ] returnPds = 100.0 pctiles = 1 / returnPds IDs = pctiles * 1000 colors = ['#e31a1c', '#fb9a99', '#f7f7f7', '#a6cee3', '#1f78b4'] ylabel = 'Water Level (m)' solnNo = formulation.bestFloodSoln.solnNo ymax = 18.0 sns.set() fig = plt.figure() # plot 100-yr event for each pathway through the SOW space for k in range(4): # 4 trajectories # load simulations from most robust solution across scenarios and find year of 100-yr event ax = fig.add_subplot(1, 4, k + 1) for i in range(5): # 5 pts along trajectory soln = getSoln(solnNo, scenarios[k], i + 1) yvalues = soln.HanoiLev maxFloods = np.max(soln.HanoiLev, 1) year = np.argsort(maxFloods)[::-1][int(IDs) - 1] yvalues = yvalues[year, :] ax.plot(range(0, 365), yvalues, c=colors[-(1 + i)], linewidth=2) ax.fill_between(range(0, 365), yvalues, color=colors[-(1 + i)], zorder=5 - i) ax.plot([0, 364], [13.4, 13.4], c='k', linewidth=2) # dike height ax.set_xlim([0, 364]) ax.set_ylim([0, ymax]) ax.set_xticks([45, 137, 229, 319]) ax.set_xticklabels(['Jun', 'Sep', 'Dec', 'Mar'], fontsize=18) if k == 0: ax.set_ylabel(ylabel, fontsize=22) ax.tick_params(axis='y', labelsize=18) else: ax.tick_params(axis='y', labelleft='off') ax.set_title(titles[k], fontsize=22) fig.suptitle( 'Water level time series during 100-yr flood with most robust solution for flooding', fontsize=22) fig.set_size_inches([26.4, 6.1]) fig.savefig('Figure8.pdf') fig.clf() return None
def plot_activity_matrix(df, cmap, normalized=False, annotate=True, out_path='', title=''): """ Plot activity matrix showing area of land transitioning between land-use types :param df: :param cmap: :param normalized: :param annotate: :param out_path: :param title: :return: """ logger.info('Plot activity matrix') sns.set(font_scale=0.8) formatter = tkr.ScalarFormatter(useMathText=True) # normalized scale is from 0 - 100, does not need scientific scale if not normalized: formatter.set_scientific(True) formatter.set_powerlimits((-2, 2)) df = df * 100.0 if normalized else df * 1.0 vmin = math.ceil(np.nanmin(df)) vmax = math.ceil(np.nanmax(df)) # maximum value on colorbar ax = sns.heatmap(df, cbar_kws={'format': formatter}, cmap=cmap, linewidths=.5, linecolor='lightgray', annot=annotate, fmt='.2g', annot_kws={'size': 6}, vmin=vmin, vmax=vmax) # for annotation of heat map cells, use: annot=True, fmt='g', annot_kws={'size': 6} # ax.invert_yaxis() ax.set_ylabel('FROM') ax.set_xlabel('TO') ax.set_title(title) locs, labels = plt.xticks() plt.setp(labels, rotation=0) locs, labels = plt.yticks() plt.setp(labels, rotation=0) plt.savefig(out_path, dpi=constants.DPI) plt.close() # revert matplotlib params sns.reset_orig() set_matplotlib_params() get_colors(palette='tableau')
def plot_qq(clf, X, y, figsize=(7, 7)): """Generate a Q-Q plot (a.k.a. normal quantile plot). Parameters ---------- clf : sklearn.linear_model A scikit-learn linear model classifier with a `predict()` method. X : numpy.ndarray Training data used to fit the classifier. y : numpy.ndarray Target training values, of shape = [n_samples]. figsize : tuple A tuple indicating the size of the plot to be created, with format (x-axis, y-axis). Defaults to (7, 7). Returns ------- matplotlib.figure.Figure The Figure instance. """ # Ensure we only plot residuals using classifiers we have tested assert isinstance(clf, _utils.supported_linear_models), ( "Classifiers of type {0} not currently supported.".format(type(clf))) residuals = stats.residuals(clf, X, y, r_type='raw') prob_plot = sm.ProbPlot(residuals, scipy.stats.t, fit=True) # Set plot style sns.set_style("darkgrid") sns.set(font_scale=1.2) # Generate plot try: # Q-Q plot doesn't respond to figure size, so prep a figure first fig, ax = plt.subplots(figsize=figsize) prob_plot.qqplot(line='45', ax=ax) plt.title("Normal Quantile Plot") plt.xlabel("Theoretical Standardized Residuals") plt.ylabel("Actual Standardized Residuals") plt.show() except: raise # Re-raise the exception finally: sns.reset_orig() return fig
import pandas as pd import matplotlib.pyplot as plt import seaborn.apionly as sns import time import warnings warnings.filterwarnings("ignore") from scipy import optimize import pymc3 as pm import theano as thno import theano.tensor as T # configure some basic options sns.set(style="darkgrid", palette="deep") # pd.set_option('display.notebook_repr_html', True) plt.rcParams["figure.figsize"] = 12, 8 np.random.seed(0) #### cut & pasted directly from the fetch_hogg2010test() function ## identical to the original dataset as hardcoded in the Hogg 2010 paper dfhogg = pd.DataFrame( np.array( [ [1, 201, 592, 61, 9, -0.84], [2, 244, 401, 25, 4, 0.31], [3, 47, 583, 38, 11, 0.64], [4, 287, 402, 15, 7, -0.27],
import pandas as pd import numpy as np from sklearn import linear_model import seaborn.apionly as sns import matplotlib.pyplot as plt sns.set(style='whitegrid', context='notebook') # Displaying original data df = pd.read_csv("data/CHD.csv", header=0) plt.figure() plt.axis([0, 70, -0.2, 1.2]) plt.title("Original data") plt.scatter(df['age'], df['chd']) # Plot a scatter draw of the random data points plt.show() # Creating logistic regression model logistic = linear_model.LogisticRegression(C=1e5) logistic.fit(df['age'].values.reshape(100, 1), df['chd'].values.reshape(100, 1)) linear_model.LogisticRegression(C=100000.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False) x_plot = np.linspace(10, 90, 100) oneprob = [] zeroprob = [] predict = [] plt.figure(figsize=(10, 10)) for i in x_plot: temp_val = np.array(i).reshape(1, -1)
def contrastplot_test(data, x, y, idx=None, alpha=0.75, axis_title_size=None, barWidth=5, contrastShareY=True, contrastEffectSizeLineStyle='solid', contrastEffectSizeLineColor='black', contrastYlim=None, contrastZeroLineStyle='solid', contrastZeroLineColor='black', effectSizeYLabel="Effect Size", figsize=None, floatContrast=True, floatSwarmSpacer=0.2, heightRatio=(1, 1), idcol=None, lineWidth=2, legend=True, legendFontSize=14, legendFontProps={}, paired=False, pal=None, rawMarkerSize=8, rawMarkerType='o', reps=3000, showGroupCount=True, show95CI=False, showAllYAxes=False, showRawData=True, smoothboot=False, statfunction=None, summaryBar=False, summaryBarColor='grey', summaryBarAlpha=0.25, summaryColour='black', summaryLine=True, summaryLineStyle='solid', summaryLineWidth=0.25, summaryMarkerSize=10, summaryMarkerType='o', swarmShareY=True, swarmYlim=None, tickAngle=45, tickAlignment='right', violinOffset=0.375, violinWidth=0.2, violinColor='k', xticksize=None, yticksize=None, **kwargs): '''Takes a pandas dataframe and produces a contrast plot: either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot. ----------------------------------------------------------------------- Description of flags upcoming.''' # Check that `data` is a pandas dataframe if 'DataFrame' not in str(type(data)): raise TypeError( "The object passed to the command is not not a pandas DataFrame.\ Please convert it to a pandas DataFrame.") # Get and set levels of data[x] if idx is None: widthratio = [1] allgrps = np.sort(data[x].unique()) if paired: # If `idx` is not specified, just take the FIRST TWO levels alphabetically. tuple_in = tuple(allgrps[0:2], ) else: # No idx is given, so all groups are compared to the first one in the DataFrame column. tuple_in = (tuple(allgrps), ) if len(allgrps) > 2: floatContrast = False else: if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) tuple_in = (idx, ) widthratio = [1] if len(idx) > 2: floatContrast = False elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! tuple_in = idx if (any(len(element) > 2 for element in tuple_in)): # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False. floatContrast = False # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. widthratio = [] for i in tuple_in: widthratio.append(len(i)) else: raise TypeError( "The object passed to `idx` consists of a mixture of single strings and tuples. \ Please make sure that `idx` is either a tuple of column names, or a tuple of tuples for plotting." ) # initialise statfunction if statfunction == None: statfunction = np.mean # Create list to collect all the contrast DataFrames generated. contrastList = list() contrastListNames = list() # # Calculate the bootstraps according to idx. # for ix, current_tuple in enumerate(tuple_in): # bscontrast=list() # for i in range (1, len(current_tuple)): # # Note that you start from one. No need to do auto-contrast! # tempbs=bootstrap_contrast( # data=data, # x=x, # y=y, # idx=[current_tuple[0], current_tuple[i]], # statfunction=statfunction, # smoothboot=smoothboot, # reps=reps) # bscontrast.append(tempbs) # contrastList.append(tempbs) # contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0]) # Setting color palette for plotting. if pal is None: if 'hue' in kwargs: colorCol = kwargs['hue'] colGrps = data[colorCol].unique() nColors = len(colGrps) else: colorCol = x colGrps = data[x].unique() nColors = len([element for tupl in tuple_in for element in tupl]) plotPal = dict(zip(colGrps, sns.color_palette(n_colors=nColors))) else: plotPal = pal # Ensure summaryLine and summaryBar are not displayed together. if summaryLine is True and summaryBar is True: summaryBar = True summaryLine = False # Turn off summary line if floatContrast is true if floatContrast: summaryLine = False if swarmYlim is None: # get range of _selected groups_. u = list() for t in idx: for i in np.unique(t): u.append(i) u = np.unique(u) tempdat = data[data[x].isin(u)] swarm_ylim = np.array([np.min(tempdat[y]), np.max(tempdat[y])]) else: swarm_ylim = np.array([swarmYlim[0], swarmYlim[1]]) if contrastYlim is not None: contrastYlim = np.array([contrastYlim[0], contrastYlim[1]]) barWidth = barWidth / 1000 # Not sure why have to reduce the barwidth by this much! if showRawData is True: maxSwarmSpan = 0.25 else: maxSwarmSpan = barWidth # Expand the ylim in both directions. ## Find half of the range of swarm_ylim. swarmrange = swarm_ylim[1] - swarm_ylim[0] pad = 0.1 * swarmrange x2 = np.array([swarm_ylim[0] - pad, swarm_ylim[1] + pad]) swarm_ylim = x2 # plot params if axis_title_size is None: axis_title_size = 25 if yticksize is None: yticksize = 18 if xticksize is None: xticksize = 18 # Set clean style sns.set(style='ticks') axisTitleParams = {'labelsize': axis_title_size} xtickParams = {'labelsize': xticksize} ytickParams = {'labelsize': yticksize} svgParams = {'fonttype': 'none'} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) rc('svg', **svgParams) if figsize is None: if len(tuple_in) > 2: figsize = (12, (12 / np.sqrt(2))) else: figsize = (8, (8 / np.sqrt(2))) # Initialise figure, taking into account desired figsize. fig = plt.figure(figsize=figsize) # Initialise GridSpec based on `tuple_in` shape. gsMain = gridspec.GridSpec( 1, np.shape(tuple_in)[0], # 1 row; columns based on number of tuples in tuple. width_ratios=widthratio, wspace=0) for gsIdx, current_tuple in enumerate(tuple_in): #### FOR EACH TUPLE IN IDX plotdat = data[data[x].isin(current_tuple)] plotdat[x] = plotdat[x].astype("category") plotdat[x].cat.set_categories(current_tuple, ordered=True, inplace=True) plotdat.sort_values(by=[x]) # Drop all nans. plotdat = plotdat.dropna() # Calculate summaries. summaries = plotdat.groupby([x], sort=True)[y].apply(statfunction) if floatContrast is True: # Use fig.add_subplot instead of plt.Subplot ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False) ax_contrast = ax_raw.twinx() else: # Create subGridSpec with 2 rows and 1 column. subGridSpec = gridspec.GridSpecFromSubplotSpec( 2, 1, subplot_spec=gsMain[gsIdx], wspace=0) # Use plt.Subplot instead of fig.add_subplot ax_raw = plt.Subplot(fig, subGridSpec[0, 0], frame_on=False) ax_contrast = plt.Subplot(fig, subGridSpec[1, 0], sharex=ax_raw, frame_on=False) # Calculate the boostrapped contrast bscontrast = list() for i in range(1, len(current_tuple)): # Note that you start from one. No need to do auto-contrast! tempbs = bootstrap_contrast( data=data, x=x, y=y, idx=[current_tuple[0], current_tuple[i]], statfunction=statfunction, smoothboot=smoothboot, reps=reps) bscontrast.append(tempbs) contrastList.append(tempbs) contrastListNames.append(current_tuple[i] + ' vs. ' + current_tuple[0]) #### PLOT RAW DATA. if showRawData is True: # Seaborn swarmplot doc says to set custom ylims first. ax_raw.set_ylim(swarm_ylim) sw = sns.swarmplot(data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if summaryBar is True: bar_raw = sns.barplot(x=summaries.index.tolist(), y=summaries.values, facecolor=summaryBarColor, ax=ax_raw, alpha=summaryBarAlpha) if floatContrast: # Get horizontal offset values. maxXBefore = max(sw.collections[0].get_offsets().T[0]) minXAfter = min(sw.collections[1].get_offsets().T[0]) xposAfter = maxXBefore + floatSwarmSpacer xAfterShift = minXAfter - xposAfter # shift the swarmplots offsetSwarmX(sw.collections[1], -xAfterShift) ## get swarm with largest span, set as max width of each barplot. for i, bar in enumerate(bar_raw.patches): x_width = bar.get_x() width = bar.get_width() centre = x_width + (width / 2.) if i == 0: bar.set_x(centre - maxSwarmSpan / 2.) else: bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.) bar.set_width(maxSwarmSpan) ## Set the ticks locations for ax_raw. ax_raw.xaxis.set_ticks((0, xposAfter)) firstTick = ax_raw.xaxis.get_ticklabels()[0].get_text() secondTick = ax_raw.xaxis.get_ticklabels()[1].get_text() ax_raw.set_xticklabels( [ firstTick, #+' n='+count[firstTick], secondTick ], #+' n='+count[secondTick]], rotation=tickAngle, horizontalalignment=tickAlignment) if summaryLine is True: for i, m in enumerate(summaries): ax_raw.plot( (i - summaryLineWidth, i + summaryLineWidth), # x-coordinates (m, m), color=summaryColour, linestyle=summaryLineStyle) if show95CI is True: sns.barplot(data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95) ax_raw.set_xlabel("") if floatContrast is False: fig.add_subplot(ax_raw) #### PLOT CONTRAST DATA. if len(current_tuple) == 2: # Plot the CIs on the contrast axes. plotbootstrap(sw.collections[1], bslist=tempbs, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, offset=floatContrast, color=violinColor, linewidth=1) if floatContrast: # Set reference lines ## First get leftmost limit of left reference group xtemp, _ = np.array(sw.collections[0].get_offsets()).T leftxlim = xtemp.min() ## Then get leftmost limit of right test group xtemp, _ = np.array(sw.collections[1].get_offsets()).T rightxlim = xtemp.min() ## zero line ax_contrast.hlines( 0, # y-coordinates leftxlim, 3.5, # x-coordinates, start and end. linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) ## effect size line ax_contrast.hlines( tempbs['summary'], rightxlim, 3.5, # x-coordinates, start and end. linestyle=contrastEffectSizeLineStyle, linewidth=0.75, color=contrastEffectSizeLineColor) ## If the effect size is positive, shift the right axis up. if float(tempbs['summary']) > 0: rightmin = ax_raw.get_ylim()[0] - float(tempbs['summary']) rightmax = ax_raw.get_ylim()[1] - float(tempbs['summary']) ## If the effect size is negative, shift the right axis down. elif float(tempbs['summary']) < 0: rightmin = ax_raw.get_ylim()[0] + float(tempbs['summary']) rightmax = ax_raw.get_ylim()[1] + float(tempbs['summary']) ax_contrast.set_ylim(rightmin, rightmax) if gsIdx > 0: ax_contrast.set_ylabel('') align_yaxis(ax_raw, tempbs['statistic_ref'], ax_contrast, 0.) else: # Set bottom axes ybounds if contrastYlim is not None: ax_contrast.set_ylim(contrastYlim) # Set xlims so everything is properly visible! swarm_xbounds = ax_raw.get_xbound() ax_contrast.set_xbound( swarm_xbounds[0] - (summaryLineWidth * 1.1), swarm_xbounds[1] + (summaryLineWidth * 1.1)) else: # Plot the CIs on the bottom axes. plotbootstrap_hubspoke(bslist=bscontrast, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, linewidth=lineWidth) if floatContrast is False: fig.add_subplot(ax_contrast) if gsIdx > 0: ax_raw.set_ylabel('') ax_contrast.set_ylabel('') # Turn contrastList into a pandas DataFrame, contrastList = pd.DataFrame(contrastList).T contrastList.columns = contrastListNames ######## axesCount = len(fig.get_axes()) ## Loop thru SWARM axes for aesthetic touchups. for i in range(0, axesCount, 2): axx = fig.axes[i] if i != axesCount - 2 and 'hue' in kwargs: # If this is not the final swarmplot, remove the hue legend. axx.legend().set_visible(False) if floatContrast is False: axx.xaxis.set_visible(False) sns.despine(ax=axx, trim=True, bottom=False, left=False) else: sns.despine(ax=axx, trim=True, bottom=True, left=True) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(showAllYAxes) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) # Add zero reference line for swarmplots with bars. if summaryBar is True: axx.add_artist( Line2D((axx.xaxis.get_view_interval()[0], axx.xaxis.get_view_interval()[1]), (0, 0), color='black', linewidth=0.75)) # I don't know why the swarm axes controls the contrast axes ticks.... if showGroupCount: count = data.groupby(x).count()[y] newticks = list() for ix, t in enumerate(axx.xaxis.get_ticklabels()): t_text = t.get_text() nt = t_text + ' n=' + str(count[t_text]) newticks.append(nt) axx.xaxis.set_ticklabels(newticks) if legend is False: axx.legend().set_visible(False) else: if i == axesCount - 2: # the last (rightmost) swarm axes. axx.legend(loc='top right', bbox_to_anchor=(1.1, 1.0), fontsize=legendFontSize, **legendFontProps) ## Loop thru the CONTRAST axes and perform aesthetic touch-ups. ## Get the y-limits: for j, i in enumerate(range(1, axesCount, 2)): axx = fig.get_axes()[i] if floatContrast is False: xleft, xright = axx.xaxis.get_view_interval() # Draw zero reference line. axx.hlines(y=0, xmin=xleft - 1, xmax=xright + 1, linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) # reset view interval. axx.set_xlim(xleft, xright) # # Draw back x-axis lines connecting ticks. # drawback_x(axx) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) sns.despine(ax=axx, top=True, right=True, left=False, bottom=False, trim=True) # Rotate tick labels. rotateTicks(axx, tickAngle, tickAlignment) else: # Re-draw the floating axis to the correct limits. lower = np.min(contrastList.ix['diffarray', j]) upper = np.max(contrastList.ix['diffarray', j]) meandiff = contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower > 0: lower = 0. if upper < 0: upper = 0. ## Get the tick interval from the left y-axis. leftticks = fig.get_axes()[i - 1].get_yticks() tickstep = leftticks[1] - leftticks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(base=tickstep)) newticks1 = axx.get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a, b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][ 0] # find out the max tick index in newticks1. newticks2.append(newticks1[ind + 1]) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][ 0] # find out the min tick index in newticks1. newticks2.append(newticks1[ind - 1]) newticks2 = np.array(newticks2) newticks2.sort() ## Second re-draw of axis to shrink it to desired limits. axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Despine the axes. sns.despine(ax=axx, trim=True, bottom=False, right=False, left=True, top=True) # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots. if (axesCount > 2 and contrastShareY is True and floatContrast is False): # Set contrast ylim as max ticks of leftmost swarm axes. if contrastYlim is None: lower = list() upper = list() for c in range(0, len(contrastList.columns)): lower.append(np.min(contrastList.ix['bca_ci_low', c])) upper.append(np.max(contrastList.ix['bca_ci_high', c])) lower = np.min(lower) upper = np.max(upper) else: lower = contrastYlim[0] upper = contrastYlim[1] normalizeContrastY(fig, contrast_ylim=contrastYlim, show_all_yaxes=showAllYAxes) # if (axesCount==2 and # floatContrast is False): # drawback_x(fig.get_axes()[1]) # drawback_y(fig.get_axes()[1]) # if swarmShareY is False: # for i in range(0, axesCount, 2): # drawback_y(fig.get_axes()[i]) # if contrastShareY is False: # for i in range(1, axesCount, 2): # if floatContrast is True: # sns.despine(ax=fig.get_axes()[i], # top=True, right=False, left=True, bottom=True, # trim=True) # else: # sns.despine(ax=fig.get_axes()[i], trim=True) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace=0.) else: # Tight Layout! gsMain.tight_layout(fig) # And we're all done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList
def pairedcontrast(data, x, y, idcol, reps = 3000, statfunction = None, idx = None, figsize = None, beforeAfterSpacer = 0.01, violinWidth = 0.005, floatOffset = 0.05, showRawData = False, showAllYAxes = False, floatContrast = True, smoothboot = False, floatViolinOffset = None, showConnections = True, summaryBar = False, contrastYlim = None, swarmYlim = None, barWidth = 0.005, rawMarkerSize = 8, rawMarkerType = 'o', summaryMarkerSize = 10, summaryMarkerType = 'o', summaryBarColor = 'grey', meansSummaryLineStyle = 'solid', contrastZeroLineStyle = 'solid', contrastEffectSizeLineStyle = 'solid', contrastZeroLineColor = 'black', contrastEffectSizeLineColor = 'black', pal = None, legendLoc = 2, legendFontSize = 12, legendMarkerScale = 1, axis_title_size = None, yticksize = None, xticksize = None, tickAngle=45, tickAlignment='right', **kwargs): # Preliminaries. data = data.dropna() # plot params if axis_title_size is None: axis_title_size = 15 if yticksize is None: yticksize = 12 if xticksize is None: xticksize = 12 axisTitleParams = {'labelsize' : axis_title_size} xtickParams = {'labelsize' : xticksize} ytickParams = {'labelsize' : yticksize} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) ## If `idx` is not specified, just take the FIRST TWO levels alphabetically. if idx is None: idx = tuple(np.unique(data[x])[0:2],) else: # check if multi-plot or not if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) if len(idx) != 2: print(idx, "does not have length 2.") sys.exit(0) else: idx = (tuple(idx, ),) elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! if ( any(len(element) != 2 for element in idx) ): # If any of the tuples contain more than 2 elements. print(element, "does not have length 2.") sys.exit(0) if floatViolinOffset is None: floatViolinOffset = beforeAfterSpacer/2 if contrastYlim is not None: contrastYlim = np.array([contrastYlim[0],contrastYlim[1]]) if swarmYlim is not None: swarmYlim = np.array([swarmYlim[0],swarmYlim[1]]) ## Here we define the palette on all the levels of the 'x' column. ## Thus, if the same pandas dataframe is re-used across different plots, ## the color identity of each group will be maintained. ## Set palette based on total number of categories in data['x'] or data['hue_column'] if 'hue' in kwargs: u = kwargs['hue'] else: u = x if ('color' not in kwargs and 'hue' not in kwargs): kwargs['color'] = 'k' if pal is None: pal = dict( zip( data[u].unique(), sns.color_palette(n_colors = len(data[u].unique())) ) ) else: pal = pal # Initialise figure. if figsize is None: if len(idx) > 2: figsize = (12,(12/np.sqrt(2))) else: figsize = (6,6) fig = plt.figure(figsize = figsize) # Initialise GridSpec based on `levs_tuple` shape. gsMain = gridspec.GridSpec( 1, np.shape(idx)[0]) # 1 row; columns based on number of tuples in tuple. # Set default statfunction if statfunction is None: statfunction = np.mean # Create list to collect all the contrast DataFrames generated. contrastList = list() contrastListNames = list() for gsIdx, xlevs in enumerate(idx): ## Pivot tempdat to get before and after lines. data_pivot = data.pivot_table(index = idcol, columns = x, values = y) # Start plotting!! if floatContrast is True: ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on = False) ax_contrast = ax_raw.twinx() else: gsSubGridSpec = gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec = gsMain[gsIdx]) ax_raw = plt.Subplot(fig, gsSubGridSpec[0, 0], frame_on = False) ax_contrast = plt.Subplot(fig, gsSubGridSpec[1, 0], sharex = ax_raw, frame_on = False) ## Plot raw data as swarmplot or stripplot. if showRawData is True: swarm_raw = sns.swarmplot(data = data, x = x, y = y, order = xlevs, ax = ax_raw, palette = pal, size = rawMarkerSize, marker = rawMarkerType, **kwargs) else: swarm_raw = sns.stripplot(data = data, x = x, y = y, order = xlevs, ax = ax_raw, palette = pal, **kwargs) swarm_raw.set_ylim(swarmYlim) ## Get some details about the raw data. maxXBefore = max(swarm_raw.collections[0].get_offsets().T[0]) minXAfter = min(swarm_raw.collections[1].get_offsets().T[0]) if showRawData is True: #beforeAfterSpacer = (getSwarmSpan(swarm_raw, 0) + getSwarmSpan(swarm_raw, 1))/2 beforeAfterSpacer = 1 xposAfter = maxXBefore + beforeAfterSpacer xAfterShift = minXAfter - xposAfter ## shift the after swarmpoints closer for aesthetic purposes. offsetSwarmX(swarm_raw.collections[1], -xAfterShift) ## pandas DataFrame of 'before' group x1 = pd.DataFrame({str(xlevs[0] + '_x') : pd.Series(swarm_raw.collections[0].get_offsets().T[0]), xlevs[0] : pd.Series(swarm_raw.collections[0].get_offsets().T[1]), '_R_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[0]), '_G_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[1]), '_B_' : pd.Series(swarm_raw.collections[0].get_facecolors().T[2]), }) ## join the RGB columns into a tuple, then assign to a column. x1['_hue_'] = x1[['_R_', '_G_', '_B_']].apply(tuple, axis=1) x1 = x1.sort_values(by = xlevs[0]) x1.index = data_pivot.sort_values(by = xlevs[0]).index ## pandas DataFrame of 'after' group ### create convenient signifiers for column names. befX = str(xlevs[0] + '_x') aftX = str(xlevs[1] + '_x') x2 = pd.DataFrame( {aftX : pd.Series(swarm_raw.collections[1].get_offsets().T[0]), xlevs[1] : pd.Series(swarm_raw.collections[1].get_offsets().T[1])} ) x2 = x2.sort_values(by = xlevs[1]) x2.index = data_pivot.sort_values(by = xlevs[1]).index ## Join x1 and x2, on both their indexes. plotPoints = x1.merge(x2, left_index = True, right_index = True, how='outer') ## Add the hue column if hue argument was passed. if 'hue' in kwargs: h = kwargs['hue'] plotPoints[h] = data.pivot(index = idcol, columns = x, values = h)[xlevs[0]] swarm_raw.legend(loc = legendLoc, fontsize = legendFontSize, markerscale = legendMarkerScale) ## Plot the lines to join the 'before' points to their respective 'after' points. if showConnections is True: for i in plotPoints.index: ax_raw.plot([ plotPoints.ix[i, befX], plotPoints.ix[i, aftX] ], [ plotPoints.ix[i, xlevs[0]], plotPoints.ix[i, xlevs[1]] ], linestyle = 'solid', color = plotPoints.ix[i, '_hue_'], linewidth = 0.75, alpha = 0.75 ) ## Hide the raw swarmplot data if so desired. if showRawData is False: swarm_raw.collections[0].set_visible(False) swarm_raw.collections[1].set_visible(False) if showRawData is True: #maxSwarmSpan = max(np.array([getSwarmSpan(swarm_raw, 0), getSwarmSpan(swarm_raw, 1)]))/2 maxSwarmSpan = 0.5 else: maxSwarmSpan = barWidth ## Plot Summary Bar. if summaryBar is True: # Calculate means means = data.groupby([x], sort = True).mean()[y] # # Calculate medians # medians = data.groupby([x], sort = True).median()[y] ## Draw summary bar. bar_raw = sns.barplot(x = means.index, y = means.values, order = xlevs, ax = ax_raw, ci = 0, facecolor = summaryBarColor, alpha = 0.25) ## Draw zero reference line. ax_raw.add_artist(Line2D( (ax_raw.xaxis.get_view_interval()[0], ax_raw.xaxis.get_view_interval()[1]), (0,0), color='black', linewidth=0.75 ) ) ## get swarm with largest span, set as max width of each barplot. for i, bar in enumerate(bar_raw.patches): x_width = bar.get_x() width = bar.get_width() centre = x_width + width/2. if i == 0: bar.set_x(centre - maxSwarmSpan/2.) else: bar.set_x(centre - xAfterShift - maxSwarmSpan/2.) bar.set_width(maxSwarmSpan) # Get y-limits of the treatment swarm points. beforeRaw = pd.DataFrame( swarm_raw.collections[0].get_offsets() ) afterRaw = pd.DataFrame( swarm_raw.collections[1].get_offsets() ) before_leftx = min(beforeRaw[0]) after_leftx = min(afterRaw[0]) after_rightx = max(afterRaw[0]) after_stat_summary = statfunction(beforeRaw[1]) # Calculate the summary difference and CI. plotPoints['delta_y'] = plotPoints[xlevs[1]] - plotPoints[xlevs[0]] plotPoints['delta_x'] = [0] * np.shape(plotPoints)[0] tempseries = plotPoints['delta_y'].tolist() test = tempseries.count(tempseries[0]) != len(tempseries) bootsDelta = bootstrap(plotPoints['delta_y'], statfunction = statfunction, smoothboot = smoothboot, reps = reps) summDelta = bootsDelta['summary'] lowDelta = bootsDelta['bca_ci_low'] highDelta = bootsDelta['bca_ci_high'] # set new xpos for delta violin. if floatContrast is True: if showRawData is False: xposPlusViolin = deltaSwarmX = after_rightx + floatViolinOffset else: xposPlusViolin = deltaSwarmX = after_rightx + maxSwarmSpan else: xposPlusViolin = xposAfter if showRawData is True: # If showRawData is True and floatContrast is True, # set violinwidth to the barwidth. violinWidth = maxSwarmSpan xmaxPlot = xposPlusViolin + violinWidth # Plot the summary measure. ax_contrast.plot(xposPlusViolin, summDelta, marker = 'o', markerfacecolor = 'k', markersize = summaryMarkerSize, alpha = 0.75 ) # Plot the CI. ax_contrast.plot([xposPlusViolin, xposPlusViolin], [lowDelta, highDelta], color = 'k', alpha = 0.75, linestyle = 'solid' ) # Plot the violin-plot. v = ax_contrast.violinplot(bootsDelta['stat_array'], [xposPlusViolin], widths = violinWidth, showextrema = False, showmeans = False) halfviolin(v, half = 'right', color = 'k') # Remove left axes x-axis title. ax_raw.set_xlabel("") # Remove floating axes y-axis title. ax_contrast.set_ylabel("") # Set proper x-limits ax_raw.set_xlim(before_leftx - beforeAfterSpacer/2, xmaxPlot) ax_raw.get_xaxis().set_view_interval(before_leftx - beforeAfterSpacer/2, after_rightx + beforeAfterSpacer/2) ax_contrast.set_xlim(ax_raw.get_xlim()) if floatContrast is True: # Set the ticks locations for ax_raw. ax_raw.get_xaxis().set_ticks((0, xposAfter)) # Make sure they have the same y-limits. ax_contrast.set_ylim(ax_raw.get_ylim()) # Drawing in the x-axis for ax_raw. ## Set the tick labels! ax_raw.set_xticklabels(xlevs, rotation = tickAngle, horizontalalignment = tickAlignment) ## Get lowest y-value for ax_raw. y = ax_raw.get_yaxis().get_view_interval()[0] # Align the left axes and the floating axes. align_yaxis(ax_raw, statfunction(plotPoints[xlevs[0]]), ax_contrast, 0) # Add label to floating axes. But on ax_raw! ax_raw.text(x = deltaSwarmX, y = ax_raw.get_yaxis().get_view_interval()[0], horizontalalignment = 'left', s = 'Difference', fontsize = 15) # Set reference lines ## zero line ax_contrast.hlines(0, # y-coordinate ax_contrast.xaxis.get_majorticklocs()[0], # x-coordinates, start and end. ax_raw.xaxis.get_view_interval()[1], linestyle = 'solid', linewidth = 0.75, color = 'black') ## effect size line ax_contrast.hlines(summDelta, ax_contrast.xaxis.get_majorticklocs()[1], ax_raw.xaxis.get_view_interval()[1], linestyle = 'solid', linewidth = 0.75, color = 'black') # Align the left axes and the floating axes. align_yaxis(ax_raw, after_stat_summary, ax_contrast, 0.) else: # Set the ticks locations for ax_raw. ax_raw.get_xaxis().set_ticks((0, xposAfter)) fig.add_subplot(ax_raw) fig.add_subplot(ax_contrast) ax_contrast.set_ylim(contrastYlim) # Calculate p-values. # 1-sample t-test to see if the mean of the difference is different from 0. ttestresult = ttest_1samp(plotPoints['delta_y'], popmean = 0)[1] bootsDelta['ttest_pval'] = ttestresult contrastList.append(bootsDelta) contrastListNames.append( str(xlevs[1])+' v.s. '+str(xlevs[0]) ) # Turn contrastList into a pandas DataFrame, contrastList = pd.DataFrame(contrastList).T contrastList.columns = contrastListNames # Now we iterate thru the contrast axes to normalize all the ylims. for j,i in enumerate(range(1, len(fig.get_axes()), 2)): axx=fig.get_axes()[i] ## Get max and min of the dataset. lower = np.min(contrastList.ix['stat_array',j]) upper = np.max(contrastList.ix['stat_array',j]) meandiff = contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower > 0: lower = 0. if upper < 0: upper = 0. ## Get tick distance on raw axes. ## This will be the tick distance for the contrast axes. rawAxesTicks = fig.get_axes()[i-1].yaxis.get_majorticklocs() rawAxesTickDist = rawAxesTicks[1] - rawAxesTicks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(rawAxesTickDist)) newticks1 = fig.get_axes()[i].get_yticks() if floatContrast is False: if (showAllYAxes is False and i in range( 2, len(fig.get_axes())) ): axx.get_yaxis().set_visible(showAllYAxes) else: ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a,b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1. newticks2.append( newticks1[ind+1] ) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1. newticks2.append( newticks1[ind-1] ) newticks2 = np.array(newticks2) newticks2.sort() axx.yaxis.set_major_locator(FixedLocator(locs = newticks2)) ## Draw zero reference line. axx.hlines(y = 0, xmin = fig.get_axes()[i].get_xaxis().get_view_interval()[0], xmax = fig.get_axes()[i].get_xaxis().get_view_interval()[1], linestyle = contrastZeroLineStyle, linewidth = 0.75, color = contrastZeroLineColor) sns.despine(ax = fig.get_axes()[i], trim = True, bottom = False, right = True, left = False, top = True) ## Draw back the lines for the relevant y-axes. drawback_y(axx) ## Draw back the lines for the relevant x-axes. drawback_x(axx) elif floatContrast is True: ## Get the original ticks on the floating y-axis. newticks1 = fig.get_axes()[i].get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a,b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1. newticks2.append( newticks1[ind+1] ) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1. newticks2.append( newticks1[ind-1] ) newticks2 = np.array(newticks2) newticks2.sort() ## Re-draw the axis. axx.yaxis.set_major_locator(FixedLocator(locs = newticks2)) ## Despine and trim the axes. sns.despine(ax = axx, trim = True, bottom = False, right = False, left = True, top = True) for i in range(0, len(fig.get_axes()), 2): # Loop through the raw data swarmplots and despine them appropriately. if floatContrast is True: sns.despine(ax = fig.get_axes()[i], trim = True, right = True) else: sns.despine(ax = fig.get_axes()[i], trim = True, bottom = True, right = True) fig.get_axes()[i].get_xaxis().set_visible(False) # Draw back the lines for the relevant y-axes. ymin = fig.get_axes()[i].get_yaxis().get_majorticklocs()[0] ymax = fig.get_axes()[i].get_yaxis().get_majorticklocs()[-1] x, _ = fig.get_axes()[i].get_xaxis().get_view_interval() fig.get_axes()[i].add_artist(Line2D((x, x), (ymin, ymax), color='black', linewidth=1.5)) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace = 0) else: # Tight Layout! gsMain.tight_layout(fig) # And we're done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList
def contrastplot( data, x=None, y=None, idx=None, idcol=None, alpha=0.75, axis_title_size=None, ci=95, contrastShareY=True, contrastEffectSizeLineStyle='solid', contrastEffectSizeLineColor='black', contrastYlim=None, contrastZeroLineStyle='solid', contrastZeroLineColor='black', connectPairs=True, effectSizeYLabel="Effect Size", figsize=None, floatContrast=True, floatSwarmSpacer=0.2, heightRatio=(1, 1), lineWidth=2, legend=True, legendFontSize=14, legendFontProps={}, paired=False, pairedDeltaLineAlpha=0.3, pairedDeltaLineWidth=1.2, pal=None, rawMarkerSize=8, rawMarkerType='o', reps=3000, showGroupCount=True, showCI=False, showAllYAxes=False, showRawData=True, smoothboot=False, statfunction=None, summaryBar=False, summaryBarColor='grey', summaryBarAlpha=0.25, summaryColour='black', summaryLine=True, summaryLineStyle='solid', summaryLineWidth=0.25, summaryMarkerSize=10, summaryMarkerType='o', swarmShareY=True, swarmYlim=None, tickAngle=45, tickAlignment='right', violinOffset=0.375, violinWidth=0.2, violinColor='k', xticksize=None, yticksize=None, **kwargs): '''Takes a pandas DataFrame and produces a contrast plot: either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot. Paired and unpaired options available. Keyword arguments: data: pandas DataFrame x: string column name containing categories to be plotted on the x-axis. y: string column name containing values to be plotted on the y-axis. idx: tuple flxible declaration of groupwise comparisons. idcol: string for paired plots. alpha: float alpha (transparency) of raw swarmed data points. axis_title_size=None ci=95 contrastShareY=True contrastEffectSizeLineStyle='solid' contrastEffectSizeLineColor='black' contrastYlim=None contrastZeroLineStyle='solid' contrastZeroLineColor='black' effectSizeYLabel="Effect Size" figsize=None floatContrast=True floatSwarmSpacer=0.2 heightRatio=(1,1) lineWidth=2 legend=True legendFontSize=14 legendFontProps={} paired=False pairedDeltaLineAlpha=0.3 pairedDeltaLineWidth=1.2 pal=None rawMarkerSize=8 rawMarkerType='o' reps=3000 showGroupCount=True showCI=False showAllYAxes=False showRawData=True smoothboot=False statfunction=None summaryBar=False summaryBarColor='grey' summaryBarAlpha=0.25 summaryColour='black' summaryLine=True summaryLineStyle='solid' summaryLineWidth=0.25 summaryMarkerSize=10 summaryMarkerType='o' swarmShareY=True swarmYlim=None tickAngle=45 tickAlignment='right' violinOffset=0.375 violinWidth=0.2 violinColor='k' xticksize=None yticksize=None Returns: An matplotlib Figure. Organization of figure Axes. ''' # Check that `data` is a pandas dataframe if 'DataFrame' not in str(type(data)): raise TypeError("The object passed to the command is not not a pandas DataFrame.\ Please convert it to a pandas DataFrame.") # make sure that at least x, y, and idx are specified. if x is None and y is None and idx is None: raise ValueError('You need to specify `x` and `y`, or `idx`. Neither has been specifed.') if x is None: # if x is not specified, assume this is a 'wide' dataset, with each idx being the name of a column. datatype='wide' # Check that the idx are legit columns. all_idx=np.unique([element for tupl in idx for element in tupl]) # # melt the data. # data=pd.melt(data,value_vars=all_idx) # x='variable' # y='value' else: # if x is specified, assume this is a 'long' dataset with each row corresponding to one datapoint. datatype='long' # make sure y is not none. if y is None: raise ValueError("`paired` is false, but no y-column given.") # Calculate Ns. counts=data.groupby(x)[y].count() # Get and set levels of data[x] if paired is True: violinWidth=0.1 # # Calculate Ns--which should be simply the number of rows in data. # counts=len(data) # is idcol supplied? if idcol is None and datatype=='long': raise ValueError('`idcol` has not been supplied but a paired plot is desired; please specify the `idcol`.') if idx is not None: # check if multi-plot or not if all(isinstance(element, str) for element in idx): # check that every idx is a column name. idx_not_in_cols=[n for n in idx if n not in data[x].unique()] if len(idx_not_in_cols)!=0: raise ValueError(str(idx_not_in_cols)+" cannot be found in the columns of `data`.") # data_wide_cols=[n for n in idx if n in data.columns] # if idx is supplied but not a multiplot (ie single list or tuple) if len(idx) != 2: raise ValueError(idx+" does not have length 2.") else: tuple_in=(tuple(idx, ),) widthratio=[1] elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! idx_not_in_cols=[n for tup in idx for n in tup if n not in data[x].unique()] if len(idx_not_in_cols)!=0: raise ValueError(str(idx_not_in_cols)+" cannot be found in the column "+x) # data_wide_cols=[n for tup in idx for n in tup if n in data.columns] if ( any(len(element) != 2 for element in idx) ): # If any of the tuples does not contain exactly 2 elements. raise ValueError(element+" does not have length 2.") # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. tuple_in=idx widthratio=[] for i in tuple_in: widthratio.append(len(i)) elif idx is None: raise ValueError('Please specify idx.') showRawData=False # Just show lines, do not show data. showCI=False # wait till I figure out how to plot this for sns.barplot. if datatype=='long': if idx is None: ## If `idx` is not specified, just take the FIRST TWO levels alphabetically. tuple_in=tuple(np.sort(np.unique(data[x]))[0:2],) # pivot the dataframe if it is long! data_pivot=data.pivot_table(index = idcol, columns = x, values = y) elif paired is False: if idx is None: widthratio=[1] tuple_in=( tuple(data[x].unique()) ,) if len(tuple_in[0])>2: floatContrast=False else: if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) # check all every idx specified can be found in data[x] idx_not_in_x=[n for n in idx if n not in data[x].unique()] if len(idx_not_in_x)!=0: raise ValueError(str(idx_not_in_x)+" cannot be found in the column "+x) tuple_in=(idx, ) widthratio=[1] if len(idx)>2: floatContrast=False elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! idx_not_in_x=[n for tup in idx for n in tup if n not in data[x].unique()] if len(idx_not_in_x)!=0: raise ValueError(str(idx_not_in_x)+" cannot be found in the column "+x) tuple_in=idx if ( any(len(element)>2 for element in tuple_in) ): # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False. floatContrast=False # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. widthratio=[] for i in tuple_in: widthratio.append(len(i)) else: raise TypeError("The object passed to `idx` consists of a mixture of single strings and tuples. \ Please make sure that `idx` is either a tuple of column names, or a tuple of tuples, for plotting.") # Ensure summaryLine and summaryBar are not displayed together. if summaryLine is True and summaryBar is True: summaryBar=True summaryLine=False # Turn off summary line if floatContrast is true if floatContrast: summaryLine=False # initialise statfunction if statfunction == None: statfunction=np.mean # Create list to collect all the contrast DataFrames generated. contrastList=list() contrastListNames=list() # Setting color palette for plotting. if pal is None: if 'hue' in kwargs: colorCol=kwargs['hue'] if colorCol not in data.columns: raise ValueError(colorCol+' is not a column name.') colGrps=data[colorCol].unique()#.tolist() plotPal=dict( zip( colGrps, sns.color_palette(n_colors=len(colGrps)) ) ) else: if datatype=='long': colGrps=data[x].unique()#.tolist() plotPal=dict( zip( colGrps, sns.color_palette(n_colors=len(colGrps)) ) ) if datatype=='wide': plotPal=np.repeat('k',len(data)) else: if datatype=='long': plotPal=pal if datatype=='wide': plotPal=list(map(lambda x:pal[x], data[hue])) if swarmYlim is None: # get range of _selected groups_. # u = list() # for t in tuple_in: # for i in np.unique(t): # u.append(i) # u = np.unique(u) u=np.unique([element for tupl in tuple_in for element in tupl]) if datatype=='long': tempdat=data[data[x].isin(u)] swarm_ylim=np.array([np.min(tempdat[y]), np.max(tempdat[y])]) if datatype=='wide': allMin=list() allMax=list() for col in u: allMin.append(np.min(data[col])) allMax.append(np.max(data[col])) swarm_ylim=np.array( [np.min(allMin),np.max(allMax)] ) swarm_ylim=np.round(swarm_ylim) else: swarm_ylim=np.array([swarmYlim[0],swarmYlim[1]]) if summaryBar is True: lims=swarm_ylim # check that 0 lies within the desired limits. # if not, extend (upper or lower) limit to zero. if 0 not in range( int(round(lims[0])),int(round(lims[1])) ): # turn swarm_ylim to integer range. # check if all negative:. if lims[0]<0. and lims[1]<0.: swarm_ylim=np.array([np.min(lims),0.]) # check if all positive. elif lims[0]>0. and lims[1]>0.: swarm_ylim=np.array([0.,np.max(lims)]) if contrastYlim is not None: contrastYlim=np.array([contrastYlim[0],contrastYlim[1]]) # plot params if axis_title_size is None: axis_title_size=27 if yticksize is None: yticksize=22 if xticksize is None: xticksize=22 # Set clean style sns.set(style='ticks') axisTitleParams={'labelsize' : axis_title_size} xtickParams={'labelsize' : xticksize} ytickParams={'labelsize' : yticksize} svgParams={'fonttype' : 'none'} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) rc('svg', **svgParams) if figsize is None: if len(tuple_in)>2: figsize=(12,(12/np.sqrt(2))) else: figsize=(8,(8/np.sqrt(2))) # calculate CI. if ci<0 or ci>100: raise ValueError('ci should be between 0 and 100.') alpha_level=(100.-ci)/100. # Initialise figure, taking into account desired figsize. fig=plt.figure(figsize=figsize) # Initialise GridSpec based on `tuple_in` shape. gsMain=gridspec.GridSpec( 1, np.shape(tuple_in)[0], # 1 row; columns based on number of tuples in tuple. width_ratios=widthratio, wspace=0 ) for gsIdx, current_tuple in enumerate(tuple_in): #### FOR EACH TUPLE IN IDX if datatype=='long': plotdat=data[data[x].isin(current_tuple)] plotdat[x]=plotdat[x].astype("category") plotdat[x].cat.set_categories( current_tuple, ordered=True, inplace=True) plotdat.sort_values(by=[x]) # # Drop all nans. # plotdat.dropna(inplace=True) summaries=plotdat.groupby(x)[y].apply(statfunction) if datatype=='wide': plotdat=data[list(current_tuple)] summaries=statfunction(plotdat) plotdat=pd.melt(plotdat) ##### NOW I HAVE MELTED THE WIDE DATA. if floatContrast is True: # Use fig.add_subplot instead of plt.Subplot. ax_raw=fig.add_subplot(gsMain[gsIdx], frame_on=False) ax_contrast=ax_raw.twinx() else: # Create subGridSpec with 2 rows and 1 column. subGridSpec=gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=gsMain[gsIdx], wspace=0) # Use plt.Subplot instead of fig.add_subplot ax_raw=plt.Subplot(fig, subGridSpec[0, 0], frame_on=False) ax_contrast=plt.Subplot(fig, subGridSpec[1, 0], sharex=ax_raw, frame_on=False) # Calculate the boostrapped contrast bscontrast=list() if paired is False: tempplotdat=plotdat[[x,y]] # only select the columns used for x and y plotting. for i in range (1, len(current_tuple)): # Note that you start from one. No need to do auto-contrast! # if datatype=='long':aas tempbs=bootstrap_contrast( data=tempplotdat.dropna(), x=x, y=y, idx=[current_tuple[0], current_tuple[i]], statfunction=statfunction, smoothboot=smoothboot, alpha_level=alpha_level, reps=reps) bscontrast.append(tempbs) contrastList.append(tempbs) contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0]) #### PLOT RAW DATA. ax_raw.set_ylim(swarm_ylim) # ax_raw.yaxis.set_major_locator(MaxNLocator(n_bins='auto')) # ax_raw.yaxis.set_major_locator(LinearLocator()) if paired is False and showRawData is True: # Seaborn swarmplot doc says to set custom ylims first. sw=sns.swarmplot( data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if floatContrast: # Get horizontal offset values. maxXBefore=max(sw.collections[0].get_offsets().T[0]) minXAfter=min(sw.collections[1].get_offsets().T[0]) xposAfter=maxXBefore+floatSwarmSpacer xAfterShift=minXAfter-xposAfter # shift the (second) swarmplot offsetSwarmX(sw.collections[1], -xAfterShift) # shift the tick. ax_raw.set_xticks([0.,1-xAfterShift]) elif paired is True: if showRawData is True: sw=sns.swarmplot(data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if connectPairs is True: # Produce paired plot with lines. before=plotdat[plotdat[x]==current_tuple[0]][y].tolist() after=plotdat[plotdat[x]==current_tuple[1]][y].tolist() linedf=pd.DataFrame( {'before':before, 'after':after} ) # to get color, need to loop thru each line and plot individually. for ii in range(0,len(linedf)): ax_raw.plot( [0,0.25], [ linedf.loc[ii,'before'], linedf.loc[ii,'after'] ], linestyle='solid', linewidth=pairedDeltaLineWidth, color=plotPal[current_tuple[0]], alpha=pairedDeltaLineAlpha, ) ax_raw.set_xlim(-0.25,0.5) ax_raw.set_xticks([0,0.25]) ax_raw.set_xticklabels([current_tuple[0],current_tuple[1]]) # if swarmYlim is None: # # if swarmYlim was not specified, tweak the y-axis # # to show all the data without losing ticks and range. # ## Get all yticks. # axxYTicks=ax_raw.yaxis.get_majorticklocs() # ## Get ytick interval. # YTickInterval=axxYTicks[1]-axxYTicks[0] # ## Get current ylim # currentYlim=ax_raw.get_ylim() # ## Extend ylim by adding a fifth of the tick interval as spacing at both ends. # ax_raw.set_ylim( # currentYlim[0]-(YTickInterval/5), # currentYlim[1]+(YTickInterval/5) # ) # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto')) # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto')) # ax_raw.yaxis.set_major_locator(LinearLocator()) if summaryBar is True: if paired is False: bar_raw=sns.barplot( x=summaries.index.tolist(), y=summaries.values, facecolor=summaryBarColor, ax=ax_raw, alpha=summaryBarAlpha) if floatContrast is True: maxSwarmSpan=2/10. xlocs=list() for i, bar in enumerate(bar_raw.patches): x_width=bar.get_x() width=bar.get_width() centre=x_width + (width/2.) if i == 0: bar.set_x(centre-maxSwarmSpan/2.) xlocs.append(centre) else: bar.set_x(centre-xAfterShift-maxSwarmSpan/2.) xlocs.append(centre-xAfterShift) bar.set_width(maxSwarmSpan) ax_raw.set_xticks(xlocs) # make sure xticklocs match the barplot. elif floatContrast is False: maxSwarmSpan=4/10. xpos=ax_raw.xaxis.get_majorticklocs() for i, bar in enumerate(bar_raw.patches): bar.set_x(xpos[i]-maxSwarmSpan/2.) bar.set_width(maxSwarmSpan) else: # if paired is true ax_raw.bar([0,0.25], [ statfunction(plotdat[current_tuple[0]]), statfunction(plotdat[current_tuple[1]]) ], color=summaryBarColor, alpha=0.5, width=0.05) ## Draw zero reference line. ax_raw.add_artist(Line2D( (ax_raw.xaxis.get_view_interval()[0], ax_raw.xaxis.get_view_interval()[1]), (0,0), color='k', linewidth=1.25) ) if summaryLine is True: if paired is True: xdelta=0 else: xdelta=summaryLineWidth for i, m in enumerate(summaries): ax_raw.plot( (i-xdelta, i+xdelta), # x-coordinates (m, m), color=summaryColour, linestyle=summaryLineStyle) if showCI is True: sns.barplot( data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95) ax_raw.set_xlabel("") if floatContrast is False: fig.add_subplot(ax_raw) #### PLOT CONTRAST DATA. if len(current_tuple)==2: if paired is False: # Plot the CIs on the contrast axes. plotbootstrap(sw.collections[1], bslist=tempbs, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, offset=floatContrast, color=violinColor, linewidth=1) else: bootsDelta = bootstrap( plotdat[current_tuple[1]]-plotdat[current_tuple[0]], statfunction=statfunction, smoothboot=smoothboot, alpha_level=alpha_level, reps=reps) contrastList.append(bootsDelta) contrastListNames.append(current_tuple[1]+' vs. '+current_tuple[0]) summDelta = bootsDelta['summary'] lowDelta = bootsDelta['bca_ci_low'] highDelta = bootsDelta['bca_ci_high'] if floatContrast: xpos=0.375 else: xpos=0.25 # Plot the summary measure. ax_contrast.plot(xpos, bootsDelta['summary'], marker=summaryMarkerType, markerfacecolor='k', markersize=summaryMarkerSize, alpha=0.75 ) # Plot the CI. ax_contrast.plot([xpos, xpos], [lowDelta, highDelta], color='k', alpha=0.75, # linewidth=1, linestyle='solid' ) # Plot the violin-plot. v = ax_contrast.violinplot(bootsDelta['stat_array'], [xpos], widths = violinWidth, showextrema = False, showmeans = False) halfviolin(v, half = 'right', color = 'k') if floatContrast: # Set reference lines if paired is False: ## First get leftmost limit of left reference group xtemp, _=np.array(sw.collections[0].get_offsets()).T leftxlim=xtemp.min() ## Then get leftmost limit of right test group xtemp, _=np.array(sw.collections[1].get_offsets()).T rightxlim=xtemp.min() ref=tempbs['summary'] else: leftxlim=0 rightxlim=0.25 ref=bootsDelta['summary'] ax_contrast.set_xlim(-0.25, 0.5) # does this work? ## zero line ax_contrast.hlines(0, # y-coordinates leftxlim, 3.5, # x-coordinates, start and end. linestyle=contrastZeroLineStyle, linewidth=1, color=contrastZeroLineColor) ## effect size line ax_contrast.hlines(ref, rightxlim, 3.5, # x-coordinates, start and end. linestyle=contrastEffectSizeLineStyle, linewidth=1, color=contrastEffectSizeLineColor) if paired is False: es=float(tempbs['summary']) refSum=tempbs['statistic_ref'] else: es=float(bootsDelta['summary']) refSum=statfunction(plotdat[current_tuple[0]]) ## If the effect size is positive, shift the right axis up. if es>0: rightmin=ax_raw.get_ylim()[0]-es rightmax=ax_raw.get_ylim()[1]-es ## If the effect size is negative, shift the right axis down. elif es<0: rightmin=ax_raw.get_ylim()[0]+es rightmax=ax_raw.get_ylim()[1]+es ax_contrast.set_ylim(rightmin, rightmax) if gsIdx>0: ax_contrast.set_ylabel('') align_yaxis(ax_raw, refSum, ax_contrast, 0.) else: # Set bottom axes ybounds if contrastYlim is not None: ax_contrast.set_ylim(contrastYlim) if paired is False: # Set xlims so everything is properly visible! swarm_xbounds=ax_raw.get_xbound() ax_contrast.set_xbound(swarm_xbounds[0] -(summaryLineWidth * 1.1), swarm_xbounds[1] + (summaryLineWidth * 1.1)) else: ax_contrast.set_xlim(-0.05,0.25+violinWidth) else: # Plot the CIs on the bottom axes. plotbootstrap_hubspoke( bslist=bscontrast, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, linewidth=lineWidth) if floatContrast is False: fig.add_subplot(ax_contrast) if gsIdx>0: ax_raw.set_ylabel('') ax_contrast.set_ylabel('') # Turn contrastList into a pandas DataFrame, contrastList=pd.DataFrame(contrastList).T contrastList.columns=contrastListNames # Get number of axes in figure for aesthetic tweaks. axesCount=len(fig.get_axes()) for i in range(0, axesCount, 2): # Set new tick labels. # The tick labels belong to the SWARM axes # for both floating and non-floating plots. # This is because `sharex` was invoked. axx=fig.axes[i] newticklabs=list() for xticklab in axx.xaxis.get_ticklabels(): t=xticklab.get_text() if paired: N=str(counts) else: N=str(counts.ix[t]) if showGroupCount: newticklabs.append(t+' n='+N) else: newticklabs.append(t) axx.set_xticklabels( newticklabs, rotation=tickAngle, horizontalalignment=tickAlignment) ## Loop thru SWARM axes for aesthetic touchups. for i in range(0, axesCount, 2): axx=fig.axes[i] if floatContrast is False: axx.xaxis.set_visible(False) sns.despine(ax=axx, trim=True, bottom=False, left=False) else: sns.despine(ax=axx, trim=True, bottom=True, left=True) if i==0: drawback_y(axx) if i!=axesCount-2 and 'hue' in kwargs: # If this is not the final swarmplot, remove the hue legend. axx.legend().set_visible(False) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) else: drawback_y(axx) # Add zero reference line for swarmplots with bars. if summaryBar is True: axx.add_artist(Line2D( (axx.xaxis.get_view_interval()[0], axx.xaxis.get_view_interval()[1]), (0,0), color='black', linewidth=0.75 ) ) if legend is False: axx.legend().set_visible(False) else: if i==axesCount-2: # the last (rightmost) swarm axes. axx.legend(loc='top right', bbox_to_anchor=(1.1,1.0), fontsize=legendFontSize, **legendFontProps) ## Loop thru the CONTRAST axes and perform aesthetic touch-ups. ## Get the y-limits: for j,i in enumerate(range(1, axesCount, 2)): axx=fig.get_axes()[i] if floatContrast is False: xleft, xright=axx.xaxis.get_view_interval() # Draw zero reference line. axx.hlines(y=0, xmin=xleft-1, xmax=xright+1, linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) # reset view interval. axx.set_xlim(xleft, xright) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes, only is axesCount is 2. # Not entirely sure why I have to do this. if axesCount==2: drawback_y(axx) sns.despine(ax=axx, top=True, right=True, left=False, bottom=False, trim=True) if j==0 and axesCount==2: # Draw back x-axis lines connecting ticks. drawback_x(axx) # Rotate tick labels. rotateTicks(axx,tickAngle,tickAlignment) elif floatContrast is True: if paired is True: # Get the bootstrapped contrast range. lower=np.min(contrastList.ix['stat_array',j]) upper=np.max(contrastList.ix['stat_array',j]) else: lower=np.min(contrastList.ix['diffarray',j]) upper=np.max(contrastList.ix['diffarray',j]) meandiff=contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower>0: lower=0. if upper<0: upper=0. ## Get the tick interval from the left y-axis. leftticks=fig.get_axes()[i-1].get_yticks() tickstep=leftticks[1] -leftticks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(base=tickstep)) newticks1=axx.get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2=list() for a,b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2)<meandiff: ind=np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1. newticks2.append( newticks1[ind+1] ) elif meandiff<np.min(newticks2): ind=np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1. newticks2.append( newticks1[ind-1] ) newticks2=np.array(newticks2) newticks2.sort() ## Second re-draw of axis to shrink it to desired limits. axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Despine the axes. sns.despine(ax=axx, trim=True, bottom=False, right=False, left=True, top=True) # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots. if (axesCount>2 and contrastShareY is True and floatContrast is False): # Set contrast ylim as max ticks of leftmost swarm axes. if contrastYlim is None: lower=list() upper=list() for c in range(0,len(contrastList.columns)): lower.append( np.min(contrastList.ix['bca_ci_low',c]) ) upper.append( np.max(contrastList.ix['bca_ci_high',c]) ) lower=np.min(lower) upper=np.max(upper) else: lower=contrastYlim[0] upper=contrastYlim[1] normalizeContrastY(fig, contrast_ylim = contrastYlim, show_all_yaxes = showAllYAxes) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace=0.) else: # Tight Layout! gsMain.tight_layout(fig) # And we're all done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList
import numpy as np import seaborn.apionly as sns import matplotlib.pyplot as plt sns.set(style="whitegrid", context="notebook") iris2 = sns.load_dataset('iris') def covariance(X, Y): xhat = np.mean(X) yhat = np.mean(Y) epsilon = 0 for x, y in zip(X, Y): epsilon = epsilon + (x - xhat) * (y - yhat) return epsilon / (len(X) - 1) # Testing results agains existing function print("My covariance function: {}".format(covariance([1, 3, 4], [1, 0, 2]))) print("Numpy covariance function: {}".format(np.cov([1, 3, 4], [1, 0, 2]))) def correlation(X, Y): return (covariance(X, Y) / (np.std(X, ddof=1) * np.std(Y, ddof=1)) ) # we had to indicat ddof=1 the unbiased std print("My Correlation: {}".format(correlation([1, 1, 4, 3], [1, 0, 2, 2]))) print("Numpy corrcoef: {}".format(np.corrcoef([1, 1, 4, 3], [1, 0, 2, 2])))
boston = datasets.load_boston() dat = pd.DataFrame(boston.data, columns=boston.feature_names) dat.head() target = pd.DataFrame(boston.target, columns=["MEDV"]) target.head() df = dat.copy() df = pd.concat([df, target], axis=1) df.head() df.info() df.describe() snsapi.set() df.hist(bins = 10, figsize = (15,10)); plt.show(); corr_matrix = df.corr() corr_matrix['MEDV'] sns.heatmap(corr_matrix); plt.show() print(boston['DESCR']) dat1 = df.loc[:, ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']] X_train, X_test, y_train, y_test = train_test_split(dat1, target, test_size = 0.2, random_state=42) y_train = y_train.values.ravel()
def run_predict_structure(generator=None, title=None): constraints = {'edge_count': (1000, 1100)} accuracy_at_k = [0] * 5 if generator != None and title != None: samples = 100 for sample in xrange(samples): G = structural_identities.constrained_generation( generator, constraints) cluster, types = predict_structure(G, trials=20) print sample, types[cluster.index(min(cluster))] array = np.array(cluster) order = array.argsort() ranks = order.argsort().tolist() k = -1 for i in xrange(len(cluster)): # 5 types of rg if title == types[ranks.index(i)]: k = i break j = len(cluster) - 1 while j >= k: accuracy_at_k[j] += 1 j -= 1 plt.figure(1) for i in xrange(len(accuracy_at_k)): accuracy_at_k[i] /= (samples * 1.0) plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o') plt.xlabel('k (top k labels)') plt.ylim((0, 1.1)) plt.ylabel('Accuracy @ k') plt.title('Prediction Accuracy for ' + title + ' Random Graphs') plt.show() # Uniformly sample across rg elif generator == None: confusion_matrix = [[0 for i in xrange(5)] for j in xrange(5)] samples = 100 index = [ 'Watts Strogatz', 'Geometric', 'Erdos Renyi', 'Barabasi Albert', 'Planted Partition Model' ] constraints_enforced = True rgs = [ structural_identities.watts_strogatz_generator, structural_identities.geometric_generator, structural_identities.erdos_renyi_generator, structural_identities.barabasi_albert_generator, structural_identities.planted_partition_generator ] for j, rg in enumerate(rgs): title = index[j] actual = j for i in xrange(samples): G = structural_identities.constrained_generation( rg, constraints) cluster, types = predict_structure(G, 5, constraints_enforced) predicted = cluster.index(min(cluster)) print title, types[predicted] confusion_matrix[actual][predicted] += 1 array = np.array(cluster) order = array.argsort() ranks = order.argsort().tolist() k = -1 for i in xrange(len(cluster)): # 5 types of rg if title == types[ranks.index(i)]: k = i break j = len(cluster) - 1 while j >= k: accuracy_at_k[j] += 1 j -= 1 small_index = ['WS', 'Geo', 'ER', 'BA', 'PPM'] for i in xrange(len(accuracy_at_k)): accuracy_at_k[i] /= (samples * 1.0 * len(rgs)) print accuracy_at_k if constraints_enforced: plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o', color='red') else: plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o') plt.xlabel('k (top k labels)') plt.ylim((0, 1.1)) plt.ylabel('Accuracy @ k') plt.title('Prediction Accuracy for Uniformly Sampled Random Graphs') plt.show() sns.set() ax = plt.axes() sns.heatmap(confusion_matrix, ax=ax, cmap="YlGnBu", yticklabels=index, xticklabels=small_index) ax.set_title('Confusion Matrix for Uniformly Sampled Random Graphs') plt.tight_layout() plt.show()
#path = r'/home/prithvi/git_work/GetOldTweets-python' path = os.getcwd() allFiles = glob.glob(path + "/bitcoinTweets_*.txt") frame = pd.DataFrame() list_ = [] for i, file_ in enumerate(allFiles): df = pd.read_csv(file_, sep='::', engine='python', header=None) df['month'] = i + 16 list_.append(df) frame = pd.concat(list_) frame.columns = ['tweets', 'sentiment', 'date'] #print frame frame['sentiment'] = frame['sentiment'].astype('category') sns.set() plt.figure() sns.countplot(x='sentiment', hue='date', data=frame, palette="Greens_d") #grouped = frame.groupby('month') ''' for name,group in grouped: print name print group group.hist(by='month', column='fare') ''' #frame.hist(by='month', column='sentiment') plt.show() #frame.sentiment.groupby('month').value_counts().plot.bar(stacked=True) #plt.show()
from seaborn.apionly import set set() from .grids import * from .miscplots import * __version__ = "0.0.0"
def plottingData(): df = pd.read_csv(os.getcwd() + "/" + args.data, delimiter='\t', header=0, sep='\t') if ',' in args.option and not args.pdf: # Spliting variable numVar = args.option.split(',') # Deleting quotes numVar[0].strip('"') numVar[1].strip('"') fig, (ax1) = plt.subplots(nrows=1) # Color by the Probability Density Function. # Kernel density estimation is a way to estimate # the probability density function (PDF) of a random # variable in a non-parametric way # Setting data x = df[numVar[0]] y = df[numVar[1]] # Calculate the point density xy = np.vstack([x, y]) z = gaussian_kde(xy)(xy) # Sort the points by density, so that the densest points are plotted last idx = z.argsort() x, y, z = x[idx], y[idx], z[idx] # Setting plot type pdf = ax1.scatter(x, y, c=z, s=50, edgecolor='') # Plot title ax1.set_title(numVar[0] + ' by ' + numVar[1]) # Hide right and top spines ax1.spines['right'].set_visible(False) ax1.spines['top'].set_visible(False) ax1.yaxis.set_ticks_position('left') ax1.xaxis.set_ticks_position('bottom') # Set x and y limits xmin = df["" + numVar[0] + ""].min() - 1 xmax = df["" + numVar[0] + ""].max() + 1 ymin = df["" + numVar[1] + ""].min() - 1 ymax = df["" + numVar[1] + ""].max() + 1 plt.xlim(xmin, xmax) plt.ylim(ymin, ymax) # Set x and y labels plt.xlabel(numVar[0]) plt.ylabel(numVar[1]) # Adding the color bar colbar = plt.colorbar(pdf) colbar.set_label('Probability Density Function') plt.show() elif not ',' in args.option: fig, (ax1) = plt.subplots(nrows=1) ax1.plot(df['#Frame'], df[args.option]) ax1.set_title(args.option + ' by Time') ax1.spines['right'].set_visible(False) ax1.spines['top'].set_visible(False) ax1.yaxis.set_ticks_position('left') ax1.xaxis.set_ticks_position('bottom') plt.xlabel('Time (ps)') xmin1 = df['#Frame'].min() - 1 xmax1 = df['#Frame'].max() + 1 plt.xlim(xmin1, xmax1) plt.ylabel(args.option) plt.show() elif ',' in args.option and args.pdf == 'kde': import seaborn.apionly as sns sns.set(style='white') numVar = args.option.split(',') numVar[0].strip('"') numVar[1].strip('"') # Distribution plot of two variables using KDE method with seaborn sns.jointplot(x=numVar[0], y=numVar[1], data=df, kind="kde", space=0, color="b") plt.show()
def test_edge_imputation(): constraints = {'edge_count': (1000, 1100)} accuracy_at_k = [0] * 5 confusion_matrix = [[0 for i in xrange(5)] for j in xrange(5)] samples = 100 index = [ 'Watts Strogatz', 'Geometric', 'Erdos Renyi', 'Barabasi Albert', 'Planted Partition Model' ] constraints_enforced = False rgs = [ structural_identities.watts_strogatz_generator, structural_identities.geometric_generator, structural_identities.erdos_renyi_generator, structural_identities.barabasi_albert_generator, structural_identities.planted_partition_generator ] for uni, rg in enumerate(rgs): title = index[uni] actual = uni created_graphs = [] for i in xrange(samples): G = structural_identities.constrained_generation(rg, constraints) degree_sequence = [1] * G.number_of_nodes() new_G = random_graphs.configuration_model(degree_sequence) new_G = impute_edge_algorithm(new_G, G) created_graphs.append(new_G) cluster, types = predict_structure(new_G, 2, constraints_enforced) predicted = cluster.index(min(cluster)) print title, types[predicted] confusion_matrix[actual][predicted] += 1 array = np.array(cluster) order = array.argsort() ranks = order.argsort().tolist() k = -1 for i in xrange(len(cluster)): # 5 types of rg if title == types[ranks.index(i)]: k = i break j = len(cluster) - 1 while j >= k: accuracy_at_k[j] += 1 j -= 1 # HERE we plot distros observed_metrics, dic = structural_identities.analyze_structural_identity_graphs( created_graphs, uni) predict_metrics, dic = structural_identities.analyze_structural_identity( rg, samples, uni) # constraints=None): structural_identities.graph_created_distributions( uni, observed_metrics, predict_metrics, dic) small_index = ['WS', 'Geo', 'ER', 'BA', 'PPM'] plt.figure(10) for i in xrange(len(accuracy_at_k)): accuracy_at_k[i] /= (samples * 1.0 * len(rgs)) if constraints_enforced: plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o', color='red') else: plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o') plt.xlabel('k (top k labels)') plt.ylim((0, 1.1)) plt.ylabel('Accuracy @ k') plt.title('Prediction Accuracy for Uniformly Sampled Random Graphs') plt.show() sns.set() ax = plt.axes() sns.heatmap(confusion_matrix, ax=ax, cmap="YlGnBu", yticklabels=index, xticklabels=small_index) ax.set_title('Confusion Matrix for Uniformly Sampled Random Graphs') plt.tight_layout() plt.show()
def plot_scree(clf_pca, xlim=[-1, 10], ylim=[-0.1, 1.0], required_var=0.90, figsize=(10, 5)): """Create side-by-side scree plots for analyzing variance of principal components from PCA. Parameters ---------- clf_pca : sklearn.decomposition.PCA A fitted scikit-learn PCA model. xlim : list X-axis range. If `required_var` is supplied, the maximum x-axis value will automatically be set so that the required variance line is visible on the plot. Defaults to [-1, 10]. ylim : list Y-axis range. Defaults to [-0.1, 1.0]. required_var : float, int, None A value of variance to distinguish on the scree plot. Set to None to not include on the plot. Defaults to 0.90. figsize : tuple A tuple indicating the size of the plot to be created, with format (x-axis, y-axis). Defaults to (10, 5). Returns ------- matplotlib.figure.Figure The Figure instance. """ # Ensure we have the a PCA model assert isinstance(clf_pca, decomposition.PCA), ( "Models of type {0} are not supported. Only models of type " "sklearn.decomposition.PCA are supported.".format(type(clf_pca))) # Extract variances from the model variances = clf_pca.explained_variance_ratio_ # Set plot style and scale up font size sns.set_style("whitegrid") sns.set(font_scale=1.2) # Set up figure and generate subplots try: fig = plt.figure('scree', figsize=figsize) # First plot (in subplot) plt.subplot(1, 2, 1) plt.xlabel("Component Number") plt.ylabel("Proportion of Variance Explained") plt.xlim(xlim) plt.ylim(ylim) plt.plot(variances, marker='o', linestyle='--') # Second plot (in subplot) cumsum = np.cumsum(variances) # Cumulative sum of variances explained plt.subplot(1, 2, 2) plt.xlabel("Number of Components") plt.ylabel("Proportion of Variance Explained") plt.xlim(xlim) plt.ylim(ylim) plt.plot(cumsum, marker='o', linestyle='--') # Add marker for required variance line if required_var is not None: required_var_components = np.argmax(cumsum >= required_var) + 1 # Update xlim if it is too small to see the marker if xlim[1] <= required_var_components: plt.xlim([xlim[0], required_var_components + 1]) # Add the marker and legend to the plot plt.axvline(x=required_var_components, c='r', linestyle='dashed', label="> {0:.0f}% Var. Explained: {1} " "components".format(required_var * 100, required_var_components)) legend = plt.legend(loc='lower right', frameon=True) legend.get_frame().set_facecolor('#FFFFFF') plt.show() except: raise # Re-raise the exception finally: sns.reset_orig() return fig
def pairedcontrast(data, x, y, idcol, reps=3000, statfunction=None, idx=None, figsize=None, beforeAfterSpacer=0.01, violinWidth=0.005, floatOffset=0.05, showRawData=False, showAllYAxes=False, floatContrast=True, smoothboot=False, floatViolinOffset=None, showConnections=True, summaryBar=False, contrastYlim=None, swarmYlim=None, barWidth=0.005, rawMarkerSize=8, rawMarkerType='o', summaryMarkerSize=10, summaryMarkerType='o', summaryBarColor='grey', meansSummaryLineStyle='solid', contrastZeroLineStyle='solid', contrastEffectSizeLineStyle='solid', contrastZeroLineColor='black', contrastEffectSizeLineColor='black', pal=None, legendLoc=2, legendFontSize=12, legendMarkerScale=1, axis_title_size=None, yticksize=None, xticksize=None, tickAngle=45, tickAlignment='right', **kwargs): # Preliminaries. data = data.dropna() # plot params if axis_title_size is None: axis_title_size = 15 if yticksize is None: yticksize = 12 if xticksize is None: xticksize = 12 axisTitleParams = {'labelsize': axis_title_size} xtickParams = {'labelsize': xticksize} ytickParams = {'labelsize': yticksize} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) ## If `idx` is not specified, just take the FIRST TWO levels alphabetically. if idx is None: idx = tuple(np.unique(data[x])[0:2], ) else: # check if multi-plot or not if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) if len(idx) != 2: print(idx, "does not have length 2.") sys.exit(0) else: idx = (tuple(idx, ), ) elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! if (any(len(element) != 2 for element in idx)): # If any of the tuples contain more than 2 elements. print(element, "does not have length 2.") sys.exit(0) if floatViolinOffset is None: floatViolinOffset = beforeAfterSpacer / 2 if contrastYlim is not None: contrastYlim = np.array([contrastYlim[0], contrastYlim[1]]) if swarmYlim is not None: swarmYlim = np.array([swarmYlim[0], swarmYlim[1]]) ## Here we define the palette on all the levels of the 'x' column. ## Thus, if the same pandas dataframe is re-used across different plots, ## the color identity of each group will be maintained. ## Set palette based on total number of categories in data['x'] or data['hue_column'] if 'hue' in kwargs: u = kwargs['hue'] else: u = x if ('color' not in kwargs and 'hue' not in kwargs): kwargs['color'] = 'k' if pal is None: pal = dict( zip(data[u].unique(), sns.color_palette(n_colors=len(data[u].unique())))) else: pal = pal # Initialise figure. if figsize is None: if len(idx) > 2: figsize = (12, (12 / np.sqrt(2))) else: figsize = (6, 6) fig = plt.figure(figsize=figsize) # Initialise GridSpec based on `levs_tuple` shape. gsMain = gridspec.GridSpec( 1, np.shape(idx)[0]) # 1 row; columns based on number of tuples in tuple. # Set default statfunction if statfunction is None: statfunction = np.mean # Create list to collect all the contrast DataFrames generated. contrastList = list() contrastListNames = list() for gsIdx, xlevs in enumerate(idx): ## Pivot tempdat to get before and after lines. data_pivot = data.pivot_table(index=idcol, columns=x, values=y) # Start plotting!! if floatContrast is True: ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False) ax_contrast = ax_raw.twinx() else: gsSubGridSpec = gridspec.GridSpecFromSubplotSpec( 2, 1, subplot_spec=gsMain[gsIdx]) ax_raw = plt.Subplot(fig, gsSubGridSpec[0, 0], frame_on=False) ax_contrast = plt.Subplot(fig, gsSubGridSpec[1, 0], sharex=ax_raw, frame_on=False) ## Plot raw data as swarmplot or stripplot. if showRawData is True: swarm_raw = sns.swarmplot(data=data, x=x, y=y, order=xlevs, ax=ax_raw, palette=pal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) else: swarm_raw = sns.stripplot(data=data, x=x, y=y, order=xlevs, ax=ax_raw, palette=pal, **kwargs) swarm_raw.set_ylim(swarmYlim) ## Get some details about the raw data. maxXBefore = max(swarm_raw.collections[0].get_offsets().T[0]) minXAfter = min(swarm_raw.collections[1].get_offsets().T[0]) if showRawData is True: #beforeAfterSpacer = (getSwarmSpan(swarm_raw, 0) + getSwarmSpan(swarm_raw, 1))/2 beforeAfterSpacer = 1 xposAfter = maxXBefore + beforeAfterSpacer xAfterShift = minXAfter - xposAfter ## shift the after swarmpoints closer for aesthetic purposes. offsetSwarmX(swarm_raw.collections[1], -xAfterShift) ## pandas DataFrame of 'before' group x1 = pd.DataFrame({ str(xlevs[0] + '_x'): pd.Series(swarm_raw.collections[0].get_offsets().T[0]), xlevs[0]: pd.Series(swarm_raw.collections[0].get_offsets().T[1]), '_R_': pd.Series(swarm_raw.collections[0].get_facecolors().T[0]), '_G_': pd.Series(swarm_raw.collections[0].get_facecolors().T[1]), '_B_': pd.Series(swarm_raw.collections[0].get_facecolors().T[2]), }) ## join the RGB columns into a tuple, then assign to a column. x1['_hue_'] = x1[['_R_', '_G_', '_B_']].apply(tuple, axis=1) x1 = x1.sort_values(by=xlevs[0]) x1.index = data_pivot.sort_values(by=xlevs[0]).index ## pandas DataFrame of 'after' group ### create convenient signifiers for column names. befX = str(xlevs[0] + '_x') aftX = str(xlevs[1] + '_x') x2 = pd.DataFrame({ aftX: pd.Series(swarm_raw.collections[1].get_offsets().T[0]), xlevs[1]: pd.Series(swarm_raw.collections[1].get_offsets().T[1]) }) x2 = x2.sort_values(by=xlevs[1]) x2.index = data_pivot.sort_values(by=xlevs[1]).index ## Join x1 and x2, on both their indexes. plotPoints = x1.merge(x2, left_index=True, right_index=True, how='outer') ## Add the hue column if hue argument was passed. if 'hue' in kwargs: h = kwargs['hue'] plotPoints[h] = data.pivot(index=idcol, columns=x, values=h)[xlevs[0]] swarm_raw.legend(loc=legendLoc, fontsize=legendFontSize, markerscale=legendMarkerScale) ## Plot the lines to join the 'before' points to their respective 'after' points. if showConnections is True: for i in plotPoints.index: ax_raw.plot( [plotPoints.ix[i, befX], plotPoints.ix[i, aftX]], [plotPoints.ix[i, xlevs[0]], plotPoints.ix[i, xlevs[1]]], linestyle='solid', color=plotPoints.ix[i, '_hue_'], linewidth=0.75, alpha=0.75) ## Hide the raw swarmplot data if so desired. if showRawData is False: swarm_raw.collections[0].set_visible(False) swarm_raw.collections[1].set_visible(False) if showRawData is True: #maxSwarmSpan = max(np.array([getSwarmSpan(swarm_raw, 0), getSwarmSpan(swarm_raw, 1)]))/2 maxSwarmSpan = 0.5 else: maxSwarmSpan = barWidth ## Plot Summary Bar. if summaryBar is True: # Calculate means means = data.groupby([x], sort=True).mean()[y] # # Calculate medians # medians = data.groupby([x], sort = True).median()[y] ## Draw summary bar. bar_raw = sns.barplot(x=means.index, y=means.values, order=xlevs, ax=ax_raw, ci=0, facecolor=summaryBarColor, alpha=0.25) ## Draw zero reference line. ax_raw.add_artist( Line2D((ax_raw.xaxis.get_view_interval()[0], ax_raw.xaxis.get_view_interval()[1]), (0, 0), color='black', linewidth=0.75)) ## get swarm with largest span, set as max width of each barplot. for i, bar in enumerate(bar_raw.patches): x_width = bar.get_x() width = bar.get_width() centre = x_width + width / 2. if i == 0: bar.set_x(centre - maxSwarmSpan / 2.) else: bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.) bar.set_width(maxSwarmSpan) # Get y-limits of the treatment swarm points. beforeRaw = pd.DataFrame(swarm_raw.collections[0].get_offsets()) afterRaw = pd.DataFrame(swarm_raw.collections[1].get_offsets()) before_leftx = min(beforeRaw[0]) after_leftx = min(afterRaw[0]) after_rightx = max(afterRaw[0]) after_stat_summary = statfunction(beforeRaw[1]) # Calculate the summary difference and CI. plotPoints['delta_y'] = plotPoints[xlevs[1]] - plotPoints[xlevs[0]] plotPoints['delta_x'] = [0] * np.shape(plotPoints)[0] tempseries = plotPoints['delta_y'].tolist() test = tempseries.count(tempseries[0]) != len(tempseries) bootsDelta = bootstrap(plotPoints['delta_y'], statfunction=statfunction, smoothboot=smoothboot, reps=reps) summDelta = bootsDelta['summary'] lowDelta = bootsDelta['bca_ci_low'] highDelta = bootsDelta['bca_ci_high'] # set new xpos for delta violin. if floatContrast is True: if showRawData is False: xposPlusViolin = deltaSwarmX = after_rightx + floatViolinOffset else: xposPlusViolin = deltaSwarmX = after_rightx + maxSwarmSpan else: xposPlusViolin = xposAfter if showRawData is True: # If showRawData is True and floatContrast is True, # set violinwidth to the barwidth. violinWidth = maxSwarmSpan xmaxPlot = xposPlusViolin + violinWidth # Plot the summary measure. ax_contrast.plot(xposPlusViolin, summDelta, marker='o', markerfacecolor='k', markersize=summaryMarkerSize, alpha=0.75) # Plot the CI. ax_contrast.plot([xposPlusViolin, xposPlusViolin], [lowDelta, highDelta], color='k', alpha=0.75, linestyle='solid') # Plot the violin-plot. v = ax_contrast.violinplot(bootsDelta['stat_array'], [xposPlusViolin], widths=violinWidth, showextrema=False, showmeans=False) halfviolin(v, half='right', color='k') # Remove left axes x-axis title. ax_raw.set_xlabel("") # Remove floating axes y-axis title. ax_contrast.set_ylabel("") # Set proper x-limits ax_raw.set_xlim(before_leftx - beforeAfterSpacer / 2, xmaxPlot) ax_raw.get_xaxis().set_view_interval( before_leftx - beforeAfterSpacer / 2, after_rightx + beforeAfterSpacer / 2) ax_contrast.set_xlim(ax_raw.get_xlim()) if floatContrast is True: # Set the ticks locations for ax_raw. ax_raw.get_xaxis().set_ticks((0, xposAfter)) # Make sure they have the same y-limits. ax_contrast.set_ylim(ax_raw.get_ylim()) # Drawing in the x-axis for ax_raw. ## Set the tick labels! ax_raw.set_xticklabels(xlevs, rotation=tickAngle, horizontalalignment=tickAlignment) ## Get lowest y-value for ax_raw. y = ax_raw.get_yaxis().get_view_interval()[0] # Align the left axes and the floating axes. align_yaxis(ax_raw, statfunction(plotPoints[xlevs[0]]), ax_contrast, 0) # Add label to floating axes. But on ax_raw! ax_raw.text(x=deltaSwarmX, y=ax_raw.get_yaxis().get_view_interval()[0], horizontalalignment='left', s='Difference', fontsize=15) # Set reference lines ## zero line ax_contrast.hlines( 0, # y-coordinate ax_contrast.xaxis.get_majorticklocs() [0], # x-coordinates, start and end. ax_raw.xaxis.get_view_interval()[1], linestyle='solid', linewidth=0.75, color='black') ## effect size line ax_contrast.hlines(summDelta, ax_contrast.xaxis.get_majorticklocs()[1], ax_raw.xaxis.get_view_interval()[1], linestyle='solid', linewidth=0.75, color='black') # Align the left axes and the floating axes. align_yaxis(ax_raw, after_stat_summary, ax_contrast, 0.) else: # Set the ticks locations for ax_raw. ax_raw.get_xaxis().set_ticks((0, xposAfter)) fig.add_subplot(ax_raw) fig.add_subplot(ax_contrast) ax_contrast.set_ylim(contrastYlim) # Calculate p-values. # 1-sample t-test to see if the mean of the difference is different from 0. ttestresult = ttest_1samp(plotPoints['delta_y'], popmean=0)[1] bootsDelta['ttest_pval'] = ttestresult contrastList.append(bootsDelta) contrastListNames.append(str(xlevs[1]) + ' v.s. ' + str(xlevs[0])) # Turn contrastList into a pandas DataFrame, contrastList = pd.DataFrame(contrastList).T contrastList.columns = contrastListNames # Now we iterate thru the contrast axes to normalize all the ylims. for j, i in enumerate(range(1, len(fig.get_axes()), 2)): axx = fig.get_axes()[i] ## Get max and min of the dataset. lower = np.min(contrastList.ix['stat_array', j]) upper = np.max(contrastList.ix['stat_array', j]) meandiff = contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower > 0: lower = 0. if upper < 0: upper = 0. ## Get tick distance on raw axes. ## This will be the tick distance for the contrast axes. rawAxesTicks = fig.get_axes()[i - 1].yaxis.get_majorticklocs() rawAxesTickDist = rawAxesTicks[1] - rawAxesTicks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(rawAxesTickDist)) newticks1 = fig.get_axes()[i].get_yticks() if floatContrast is False: if (showAllYAxes is False and i in range(2, len(fig.get_axes()))): axx.get_yaxis().set_visible(showAllYAxes) else: ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a, b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][ 0] # find out the max tick index in newticks1. newticks2.append(newticks1[ind + 1]) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][ 0] # find out the min tick index in newticks1. newticks2.append(newticks1[ind - 1]) newticks2 = np.array(newticks2) newticks2.sort() axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Draw zero reference line. axx.hlines( y=0, xmin=fig.get_axes()[i].get_xaxis().get_view_interval()[0], xmax=fig.get_axes()[i].get_xaxis().get_view_interval()[1], linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) sns.despine(ax=fig.get_axes()[i], trim=True, bottom=False, right=True, left=False, top=True) ## Draw back the lines for the relevant y-axes. drawback_y(axx) ## Draw back the lines for the relevant x-axes. drawback_x(axx) elif floatContrast is True: ## Get the original ticks on the floating y-axis. newticks1 = fig.get_axes()[i].get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a, b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][ 0] # find out the max tick index in newticks1. newticks2.append(newticks1[ind + 1]) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][ 0] # find out the min tick index in newticks1. newticks2.append(newticks1[ind - 1]) newticks2 = np.array(newticks2) newticks2.sort() ## Re-draw the axis. axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Despine and trim the axes. sns.despine(ax=axx, trim=True, bottom=False, right=False, left=True, top=True) for i in range(0, len(fig.get_axes()), 2): # Loop through the raw data swarmplots and despine them appropriately. if floatContrast is True: sns.despine(ax=fig.get_axes()[i], trim=True, right=True) else: sns.despine(ax=fig.get_axes()[i], trim=True, bottom=True, right=True) fig.get_axes()[i].get_xaxis().set_visible(False) # Draw back the lines for the relevant y-axes. ymin = fig.get_axes()[i].get_yaxis().get_majorticklocs()[0] ymax = fig.get_axes()[i].get_yaxis().get_majorticklocs()[-1] x, _ = fig.get_axes()[i].get_xaxis().get_view_interval() fig.get_axes()[i].add_artist( Line2D((x, x), (ymin, ymax), color='black', linewidth=1.5)) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace=0) else: # Tight Layout! gsMain.tight_layout(fig) # And we're done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList
def contrastplot(data, x=None, y=None, idx=None, idcol=None, alpha=0.75, axis_title_size=None, ci=95, contrastShareY=True, contrastEffectSizeLineStyle='solid', contrastEffectSizeLineColor='black', contrastYlim=None, contrastZeroLineStyle='solid', contrastZeroLineColor='black', connectPairs=True, effectSizeYLabel="Effect Size", figsize=None, floatContrast=True, floatSwarmSpacer=0.2, heightRatio=(1, 1), lineWidth=2, legend=True, legendFontSize=14, legendFontProps={}, paired=False, pairedDeltaLineAlpha=0.3, pairedDeltaLineWidth=1.2, pal=None, rawMarkerSize=8, rawMarkerType='o', reps=3000, showGroupCount=True, showCI=False, showAllYAxes=False, showRawData=True, smoothboot=False, statfunction=None, summaryBar=False, summaryBarColor='grey', summaryBarAlpha=0.25, summaryColour='black', summaryLine=True, summaryLineStyle='solid', summaryLineWidth=0.25, summaryMarkerSize=10, summaryMarkerType='o', swarmShareY=True, swarmYlim=None, tickAngle=45, tickAlignment='right', violinOffset=0.375, violinWidth=0.2, violinColor='k', xticksize=None, yticksize=None, **kwargs): '''Takes a pandas DataFrame and produces a contrast plot: either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot. Paired and unpaired options available. Keyword arguments: data: pandas DataFrame x: string column name containing categories to be plotted on the x-axis. y: string column name containing values to be plotted on the y-axis. idx: tuple flxible declaration of groupwise comparisons. idcol: string for paired plots. alpha: float alpha (transparency) of raw swarmed data points. axis_title_size=None ci=95 contrastShareY=True contrastEffectSizeLineStyle='solid' contrastEffectSizeLineColor='black' contrastYlim=None contrastZeroLineStyle='solid' contrastZeroLineColor='black' effectSizeYLabel="Effect Size" figsize=None floatContrast=True floatSwarmSpacer=0.2 heightRatio=(1,1) lineWidth=2 legend=True legendFontSize=14 legendFontProps={} paired=False pairedDeltaLineAlpha=0.3 pairedDeltaLineWidth=1.2 pal=None rawMarkerSize=8 rawMarkerType='o' reps=3000 showGroupCount=True showCI=False showAllYAxes=False showRawData=True smoothboot=False statfunction=None summaryBar=False summaryBarColor='grey' summaryBarAlpha=0.25 summaryColour='black' summaryLine=True summaryLineStyle='solid' summaryLineWidth=0.25 summaryMarkerSize=10 summaryMarkerType='o' swarmShareY=True swarmYlim=None tickAngle=45 tickAlignment='right' violinOffset=0.375 violinWidth=0.2 violinColor='k' xticksize=None yticksize=None Returns: An matplotlib Figure. Organization of figure Axes. ''' # Check that `data` is a pandas dataframe if 'DataFrame' not in str(type(data)): raise TypeError( "The object passed to the command is not not a pandas DataFrame.\ Please convert it to a pandas DataFrame.") # make sure that at least x, y, and idx are specified. if x is None and y is None and idx is None: raise ValueError( 'You need to specify `x` and `y`, or `idx`. Neither has been specifed.' ) if x is None: # if x is not specified, assume this is a 'wide' dataset, with each idx being the name of a column. datatype = 'wide' # Check that the idx are legit columns. all_idx = np.unique([element for tupl in idx for element in tupl]) # # melt the data. # data=pd.melt(data,value_vars=all_idx) # x='variable' # y='value' else: # if x is specified, assume this is a 'long' dataset with each row corresponding to one datapoint. datatype = 'long' # make sure y is not none. if y is None: raise ValueError("`paired` is false, but no y-column given.") # Calculate Ns. counts = data.groupby(x)[y].count() # Get and set levels of data[x] if paired is True: violinWidth = 0.1 # # Calculate Ns--which should be simply the number of rows in data. # counts=len(data) # is idcol supplied? if idcol is None and datatype == 'long': raise ValueError( '`idcol` has not been supplied but a paired plot is desired; please specify the `idcol`.' ) if idx is not None: # check if multi-plot or not if all(isinstance(element, str) for element in idx): # check that every idx is a column name. idx_not_in_cols = [n for n in idx if n not in data[x].unique()] if len(idx_not_in_cols) != 0: raise ValueError( str(idx_not_in_cols) + " cannot be found in the columns of `data`.") # data_wide_cols=[n for n in idx if n in data.columns] # if idx is supplied but not a multiplot (ie single list or tuple) if len(idx) != 2: raise ValueError(idx + " does not have length 2.") else: tuple_in = (tuple(idx, ), ) widthratio = [1] elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! idx_not_in_cols = [ n for tup in idx for n in tup if n not in data[x].unique() ] if len(idx_not_in_cols) != 0: raise ValueError( str(idx_not_in_cols) + " cannot be found in the column " + x) # data_wide_cols=[n for tup in idx for n in tup if n in data.columns] if (any(len(element) != 2 for element in idx)): # If any of the tuples does not contain exactly 2 elements. raise ValueError(element + " does not have length 2.") # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. tuple_in = idx widthratio = [] for i in tuple_in: widthratio.append(len(i)) elif idx is None: raise ValueError('Please specify idx.') showRawData = False # Just show lines, do not show data. showCI = False # wait till I figure out how to plot this for sns.barplot. if datatype == 'long': if idx is None: ## If `idx` is not specified, just take the FIRST TWO levels alphabetically. tuple_in = tuple(np.sort(np.unique(data[x]))[0:2], ) # pivot the dataframe if it is long! data_pivot = data.pivot_table(index=idcol, columns=x, values=y) elif paired is False: if idx is None: widthratio = [1] tuple_in = (tuple(data[x].unique()), ) if len(tuple_in[0]) > 2: floatContrast = False else: if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) # check all every idx specified can be found in data[x] idx_not_in_x = [n for n in idx if n not in data[x].unique()] if len(idx_not_in_x) != 0: raise ValueError( str(idx_not_in_x) + " cannot be found in the column " + x) tuple_in = (idx, ) widthratio = [1] if len(idx) > 2: floatContrast = False elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! idx_not_in_x = [ n for tup in idx for n in tup if n not in data[x].unique() ] if len(idx_not_in_x) != 0: raise ValueError( str(idx_not_in_x) + " cannot be found in the column " + x) tuple_in = idx if (any(len(element) > 2 for element in tuple_in)): # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False. floatContrast = False # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. widthratio = [] for i in tuple_in: widthratio.append(len(i)) else: raise TypeError( "The object passed to `idx` consists of a mixture of single strings and tuples. \ Please make sure that `idx` is either a tuple of column names, or a tuple of tuples, for plotting." ) # Ensure summaryLine and summaryBar are not displayed together. if summaryLine is True and summaryBar is True: summaryBar = True summaryLine = False # Turn off summary line if floatContrast is true if floatContrast: summaryLine = False # initialise statfunction if statfunction == None: statfunction = np.mean # Create list to collect all the contrast DataFrames generated. contrastList = list() contrastListNames = list() # Setting color palette for plotting. if pal is None: if 'hue' in kwargs: colorCol = kwargs['hue'] if colorCol not in data.columns: raise ValueError(colorCol + ' is not a column name.') colGrps = data[colorCol].unique() #.tolist() plotPal = dict( zip(colGrps, sns.color_palette(n_colors=len(colGrps)))) else: if datatype == 'long': colGrps = data[x].unique() #.tolist() plotPal = dict( zip(colGrps, sns.color_palette(n_colors=len(colGrps)))) if datatype == 'wide': plotPal = np.repeat('k', len(data)) else: if datatype == 'long': plotPal = pal if datatype == 'wide': plotPal = list(map(lambda x: pal[x], data[hue])) if swarmYlim is None: # get range of _selected groups_. # u = list() # for t in tuple_in: # for i in np.unique(t): # u.append(i) # u = np.unique(u) u = np.unique([element for tupl in tuple_in for element in tupl]) if datatype == 'long': tempdat = data[data[x].isin(u)] swarm_ylim = np.array([np.min(tempdat[y]), np.max(tempdat[y])]) if datatype == 'wide': allMin = list() allMax = list() for col in u: allMin.append(np.min(data[col])) allMax.append(np.max(data[col])) swarm_ylim = np.array([np.min(allMin), np.max(allMax)]) swarm_ylim = np.round(swarm_ylim) else: swarm_ylim = np.array([swarmYlim[0], swarmYlim[1]]) if summaryBar is True: lims = swarm_ylim # check that 0 lies within the desired limits. # if not, extend (upper or lower) limit to zero. if 0 not in range(int(round(lims[0])), int(round( lims[1]))): # turn swarm_ylim to integer range. # check if all negative:. if lims[0] < 0. and lims[1] < 0.: swarm_ylim = np.array([np.min(lims), 0.]) # check if all positive. elif lims[0] > 0. and lims[1] > 0.: swarm_ylim = np.array([0., np.max(lims)]) if contrastYlim is not None: contrastYlim = np.array([contrastYlim[0], contrastYlim[1]]) # plot params if axis_title_size is None: axis_title_size = 27 if yticksize is None: yticksize = 22 if xticksize is None: xticksize = 22 # Set clean style sns.set(style='ticks') axisTitleParams = {'labelsize': axis_title_size} xtickParams = {'labelsize': xticksize} ytickParams = {'labelsize': yticksize} svgParams = {'fonttype': 'none'} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) rc('svg', **svgParams) if figsize is None: if len(tuple_in) > 2: figsize = (12, (12 / np.sqrt(2))) else: figsize = (8, (8 / np.sqrt(2))) # calculate CI. if ci < 0 or ci > 100: raise ValueError('ci should be between 0 and 100.') alpha_level = (100. - ci) / 100. # Initialise figure, taking into account desired figsize. fig = plt.figure(figsize=figsize) # Initialise GridSpec based on `tuple_in` shape. gsMain = gridspec.GridSpec( 1, np.shape(tuple_in)[0], # 1 row; columns based on number of tuples in tuple. width_ratios=widthratio, wspace=0) for gsIdx, current_tuple in enumerate(tuple_in): #### FOR EACH TUPLE IN IDX if datatype == 'long': plotdat = data[data[x].isin(current_tuple)] plotdat[x] = plotdat[x].astype("category") plotdat[x].cat.set_categories(current_tuple, ordered=True, inplace=True) plotdat.sort_values(by=[x]) # # Drop all nans. # plotdat.dropna(inplace=True) summaries = plotdat.groupby(x)[y].apply(statfunction) if datatype == 'wide': plotdat = data[list(current_tuple)] summaries = statfunction(plotdat) plotdat = pd.melt(plotdat) ##### NOW I HAVE MELTED THE WIDE DATA. if floatContrast is True: # Use fig.add_subplot instead of plt.Subplot. ax_raw = fig.add_subplot(gsMain[gsIdx], frame_on=False) ax_contrast = ax_raw.twinx() else: # Create subGridSpec with 2 rows and 1 column. subGridSpec = gridspec.GridSpecFromSubplotSpec( 2, 1, subplot_spec=gsMain[gsIdx], wspace=0) # Use plt.Subplot instead of fig.add_subplot ax_raw = plt.Subplot(fig, subGridSpec[0, 0], frame_on=False) ax_contrast = plt.Subplot(fig, subGridSpec[1, 0], sharex=ax_raw, frame_on=False) # Calculate the boostrapped contrast bscontrast = list() if paired is False: tempplotdat = plotdat[[ x, y ]] # only select the columns used for x and y plotting. for i in range(1, len(current_tuple)): # Note that you start from one. No need to do auto-contrast! # if datatype=='long':aas tempbs = bootstrap_contrast( data=tempplotdat.dropna(), x=x, y=y, idx=[current_tuple[0], current_tuple[i]], statfunction=statfunction, smoothboot=smoothboot, alpha_level=alpha_level, reps=reps) bscontrast.append(tempbs) contrastList.append(tempbs) contrastListNames.append(current_tuple[i] + ' vs. ' + current_tuple[0]) #### PLOT RAW DATA. ax_raw.set_ylim(swarm_ylim) # ax_raw.yaxis.set_major_locator(MaxNLocator(n_bins='auto')) # ax_raw.yaxis.set_major_locator(LinearLocator()) if paired is False and showRawData is True: # Seaborn swarmplot doc says to set custom ylims first. sw = sns.swarmplot(data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if floatContrast: # Get horizontal offset values. maxXBefore = max(sw.collections[0].get_offsets().T[0]) minXAfter = min(sw.collections[1].get_offsets().T[0]) xposAfter = maxXBefore + floatSwarmSpacer xAfterShift = minXAfter - xposAfter # shift the (second) swarmplot offsetSwarmX(sw.collections[1], -xAfterShift) # shift the tick. ax_raw.set_xticks([0., 1 - xAfterShift]) elif paired is True: if showRawData is True: sw = sns.swarmplot(data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if connectPairs is True: # Produce paired plot with lines. before = plotdat[plotdat[x] == current_tuple[0]][y].tolist() after = plotdat[plotdat[x] == current_tuple[1]][y].tolist() linedf = pd.DataFrame({'before': before, 'after': after}) # to get color, need to loop thru each line and plot individually. for ii in range(0, len(linedf)): ax_raw.plot( [0, 0.25], [linedf.loc[ii, 'before'], linedf.loc[ii, 'after']], linestyle='solid', linewidth=pairedDeltaLineWidth, color=plotPal[current_tuple[0]], alpha=pairedDeltaLineAlpha, ) ax_raw.set_xlim(-0.25, 0.5) ax_raw.set_xticks([0, 0.25]) ax_raw.set_xticklabels([current_tuple[0], current_tuple[1]]) # if swarmYlim is None: # # if swarmYlim was not specified, tweak the y-axis # # to show all the data without losing ticks and range. # ## Get all yticks. # axxYTicks=ax_raw.yaxis.get_majorticklocs() # ## Get ytick interval. # YTickInterval=axxYTicks[1]-axxYTicks[0] # ## Get current ylim # currentYlim=ax_raw.get_ylim() # ## Extend ylim by adding a fifth of the tick interval as spacing at both ends. # ax_raw.set_ylim( # currentYlim[0]-(YTickInterval/5), # currentYlim[1]+(YTickInterval/5) # ) # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto')) # ax_raw.yaxis.set_major_locator(MaxNLocator(nbins='auto')) # ax_raw.yaxis.set_major_locator(LinearLocator()) if summaryBar is True: if paired is False: bar_raw = sns.barplot(x=summaries.index.tolist(), y=summaries.values, facecolor=summaryBarColor, ax=ax_raw, alpha=summaryBarAlpha) if floatContrast is True: maxSwarmSpan = 2 / 10. xlocs = list() for i, bar in enumerate(bar_raw.patches): x_width = bar.get_x() width = bar.get_width() centre = x_width + (width / 2.) if i == 0: bar.set_x(centre - maxSwarmSpan / 2.) xlocs.append(centre) else: bar.set_x(centre - xAfterShift - maxSwarmSpan / 2.) xlocs.append(centre - xAfterShift) bar.set_width(maxSwarmSpan) ax_raw.set_xticks( xlocs) # make sure xticklocs match the barplot. elif floatContrast is False: maxSwarmSpan = 4 / 10. xpos = ax_raw.xaxis.get_majorticklocs() for i, bar in enumerate(bar_raw.patches): bar.set_x(xpos[i] - maxSwarmSpan / 2.) bar.set_width(maxSwarmSpan) else: # if paired is true ax_raw.bar([0, 0.25], [ statfunction(plotdat[current_tuple[0]]), statfunction(plotdat[current_tuple[1]]) ], color=summaryBarColor, alpha=0.5, width=0.05) ## Draw zero reference line. ax_raw.add_artist( Line2D((ax_raw.xaxis.get_view_interval()[0], ax_raw.xaxis.get_view_interval()[1]), (0, 0), color='k', linewidth=1.25)) if summaryLine is True: if paired is True: xdelta = 0 else: xdelta = summaryLineWidth for i, m in enumerate(summaries): ax_raw.plot( (i - xdelta, i + xdelta), # x-coordinates (m, m), color=summaryColour, linestyle=summaryLineStyle) if showCI is True: sns.barplot(data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95) ax_raw.set_xlabel("") if floatContrast is False: fig.add_subplot(ax_raw) #### PLOT CONTRAST DATA. if len(current_tuple) == 2: if paired is False: # Plot the CIs on the contrast axes. plotbootstrap(sw.collections[1], bslist=tempbs, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, offset=floatContrast, color=violinColor, linewidth=1) else: bootsDelta = bootstrap(plotdat[current_tuple[1]] - plotdat[current_tuple[0]], statfunction=statfunction, smoothboot=smoothboot, alpha_level=alpha_level, reps=reps) contrastList.append(bootsDelta) contrastListNames.append(current_tuple[1] + ' vs. ' + current_tuple[0]) summDelta = bootsDelta['summary'] lowDelta = bootsDelta['bca_ci_low'] highDelta = bootsDelta['bca_ci_high'] if floatContrast: xpos = 0.375 else: xpos = 0.25 # Plot the summary measure. ax_contrast.plot(xpos, bootsDelta['summary'], marker=summaryMarkerType, markerfacecolor='k', markersize=summaryMarkerSize, alpha=0.75) # Plot the CI. ax_contrast.plot( [xpos, xpos], [lowDelta, highDelta], color='k', alpha=0.75, # linewidth=1, linestyle='solid') # Plot the violin-plot. v = ax_contrast.violinplot(bootsDelta['stat_array'], [xpos], widths=violinWidth, showextrema=False, showmeans=False) halfviolin(v, half='right', color='k') if floatContrast: # Set reference lines if paired is False: ## First get leftmost limit of left reference group xtemp, _ = np.array(sw.collections[0].get_offsets()).T leftxlim = xtemp.min() ## Then get leftmost limit of right test group xtemp, _ = np.array(sw.collections[1].get_offsets()).T rightxlim = xtemp.min() ref = tempbs['summary'] else: leftxlim = 0 rightxlim = 0.25 ref = bootsDelta['summary'] ax_contrast.set_xlim(-0.25, 0.5) # does this work? ## zero line ax_contrast.hlines( 0, # y-coordinates leftxlim, 3.5, # x-coordinates, start and end. linestyle=contrastZeroLineStyle, linewidth=1, color=contrastZeroLineColor) ## effect size line ax_contrast.hlines( ref, rightxlim, 3.5, # x-coordinates, start and end. linestyle=contrastEffectSizeLineStyle, linewidth=1, color=contrastEffectSizeLineColor) if paired is False: es = float(tempbs['summary']) refSum = tempbs['statistic_ref'] else: es = float(bootsDelta['summary']) refSum = statfunction(plotdat[current_tuple[0]]) ## If the effect size is positive, shift the right axis up. if es > 0: rightmin = ax_raw.get_ylim()[0] - es rightmax = ax_raw.get_ylim()[1] - es ## If the effect size is negative, shift the right axis down. elif es < 0: rightmin = ax_raw.get_ylim()[0] + es rightmax = ax_raw.get_ylim()[1] + es ax_contrast.set_ylim(rightmin, rightmax) if gsIdx > 0: ax_contrast.set_ylabel('') align_yaxis(ax_raw, refSum, ax_contrast, 0.) else: # Set bottom axes ybounds if contrastYlim is not None: ax_contrast.set_ylim(contrastYlim) if paired is False: # Set xlims so everything is properly visible! swarm_xbounds = ax_raw.get_xbound() ax_contrast.set_xbound( swarm_xbounds[0] - (summaryLineWidth * 1.1), swarm_xbounds[1] + (summaryLineWidth * 1.1)) else: ax_contrast.set_xlim(-0.05, 0.25 + violinWidth) else: # Plot the CIs on the bottom axes. plotbootstrap_hubspoke(bslist=bscontrast, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, linewidth=lineWidth) if floatContrast is False: fig.add_subplot(ax_contrast) if gsIdx > 0: ax_raw.set_ylabel('') ax_contrast.set_ylabel('') # Turn contrastList into a pandas DataFrame, contrastList = pd.DataFrame(contrastList).T contrastList.columns = contrastListNames # Get number of axes in figure for aesthetic tweaks. axesCount = len(fig.get_axes()) for i in range(0, axesCount, 2): # Set new tick labels. # The tick labels belong to the SWARM axes # for both floating and non-floating plots. # This is because `sharex` was invoked. axx = fig.axes[i] newticklabs = list() for xticklab in axx.xaxis.get_ticklabels(): t = xticklab.get_text() if paired: N = str(counts) else: N = str(counts.ix[t]) if showGroupCount: newticklabs.append(t + ' n=' + N) else: newticklabs.append(t) axx.set_xticklabels(newticklabs, rotation=tickAngle, horizontalalignment=tickAlignment) ## Loop thru SWARM axes for aesthetic touchups. for i in range(0, axesCount, 2): axx = fig.axes[i] if floatContrast is False: axx.xaxis.set_visible(False) sns.despine(ax=axx, trim=True, bottom=False, left=False) else: sns.despine(ax=axx, trim=True, bottom=True, left=True) if i == 0: drawback_y(axx) if i != axesCount - 2 and 'hue' in kwargs: # If this is not the final swarmplot, remove the hue legend. axx.legend().set_visible(False) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) else: drawback_y(axx) # Add zero reference line for swarmplots with bars. if summaryBar is True: axx.add_artist( Line2D((axx.xaxis.get_view_interval()[0], axx.xaxis.get_view_interval()[1]), (0, 0), color='black', linewidth=0.75)) if legend is False: axx.legend().set_visible(False) else: if i == axesCount - 2: # the last (rightmost) swarm axes. axx.legend(loc='top right', bbox_to_anchor=(1.1, 1.0), fontsize=legendFontSize, **legendFontProps) ## Loop thru the CONTRAST axes and perform aesthetic touch-ups. ## Get the y-limits: for j, i in enumerate(range(1, axesCount, 2)): axx = fig.get_axes()[i] if floatContrast is False: xleft, xright = axx.xaxis.get_view_interval() # Draw zero reference line. axx.hlines(y=0, xmin=xleft - 1, xmax=xright + 1, linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) # reset view interval. axx.set_xlim(xleft, xright) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes, only is axesCount is 2. # Not entirely sure why I have to do this. if axesCount == 2: drawback_y(axx) sns.despine(ax=axx, top=True, right=True, left=False, bottom=False, trim=True) if j == 0 and axesCount == 2: # Draw back x-axis lines connecting ticks. drawback_x(axx) # Rotate tick labels. rotateTicks(axx, tickAngle, tickAlignment) elif floatContrast is True: if paired is True: # Get the bootstrapped contrast range. lower = np.min(contrastList.ix['stat_array', j]) upper = np.max(contrastList.ix['stat_array', j]) else: lower = np.min(contrastList.ix['diffarray', j]) upper = np.max(contrastList.ix['diffarray', j]) meandiff = contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower > 0: lower = 0. if upper < 0: upper = 0. ## Get the tick interval from the left y-axis. leftticks = fig.get_axes()[i - 1].get_yticks() tickstep = leftticks[1] - leftticks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(base=tickstep)) newticks1 = axx.get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2 = list() for a, b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2) < meandiff: ind = np.where(newticks1 == np.max(newticks2))[0][ 0] # find out the max tick index in newticks1. newticks2.append(newticks1[ind + 1]) elif meandiff < np.min(newticks2): ind = np.where(newticks1 == np.min(newticks2))[0][ 0] # find out the min tick index in newticks1. newticks2.append(newticks1[ind - 1]) newticks2 = np.array(newticks2) newticks2.sort() ## Second re-draw of axis to shrink it to desired limits. axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Despine the axes. sns.despine(ax=axx, trim=True, bottom=False, right=False, left=True, top=True) # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots. if (axesCount > 2 and contrastShareY is True and floatContrast is False): # Set contrast ylim as max ticks of leftmost swarm axes. if contrastYlim is None: lower = list() upper = list() for c in range(0, len(contrastList.columns)): lower.append(np.min(contrastList.ix['bca_ci_low', c])) upper.append(np.max(contrastList.ix['bca_ci_high', c])) lower = np.min(lower) upper = np.max(upper) else: lower = contrastYlim[0] upper = contrastYlim[1] normalizeContrastY(fig, contrast_ylim=contrastYlim, show_all_yaxes=showAllYAxes) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace=0.) else: # Tight Layout! gsMain.tight_layout(fig) # And we're all done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList
plt.ylim([0.4, 1.0]) plt.xticks(np.arange(1, 10.5, step=1)) plt.grid plt.savefig('FIGURES/accTrainVal_10foldCV_4classes_old.pdf') # normalize confusion matrices normalizedAvgCM = np.zeros((numClasses, numClasses)) for i in range(len(confusionMatrices)): cm = confusionMatrices[i] normalizedAvgCM += cm / cm.astype(np.float).sum(axis=1) normalizedAvgCM = normalizedAvgCM / nfold # plot one time prediction confusion matrix df_cm = pd.DataFrame(normalizedAvgCM, index=classNames, columns=classNames) plt.figure(figsize=(9.6, 4.1)) # 5.7 sns.set(font_scale=1.4) # for label size ax = sns.heatmap( df_cm, cbar_kws={'ticks': [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}, vmin=0, vmax=1.0, annot=True, annot_kws={"size": 18}, fmt='2.2f', cmap="Blues") # font size bottom, top = ax.get_ylim() ax.set_ylim(bottom + 0.5, top - 0.5) ax.set_ylim(sorted(ax.get_xlim(), reverse=True)) ax.set_yticklabels(classNames, rotation=0, fontsize="16", va="center") ax.set_xticklabels(classNames, rotation=0, fontsize="16", ha="center") plt.tight_layout()
def perform_edge_imputation(): accuracy_at_removed = [] remove_probability = [0.1 * i for i in xrange(10)] constraints = {'edge_count': (1000, 1100)} samples = 2 index = [ 'Watts Strogatz', 'Geometric', 'Erdos Renyi', 'Barabasi Albert', 'Planted Partition Model' ] constraints_enforced = False rgs = [ structural_identities.watts_strogatz_generator, structural_identities.geometric_generator, structural_identities.erdos_renyi_generator, structural_identities.barabasi_albert_generator, structural_identities.planted_partition_generator ] for p in remove_probability: correct = 0.0 accuracy_at_k = [0] * 5 confusion_matrix = [[0 for i in xrange(5)] for j in xrange(5)] for uni, rg in enumerate(rgs): title = index[uni] actual = uni for i in xrange(samples): G = structural_identities.constrained_generation( rg, constraints) new_G = deepcopy(G) new_G = remove_edges(new_G, p) new_G = impute_edge_algorithm(new_G, G) cluster, types = predict_structure(new_G, 1, constraints_enforced) predicted = cluster.index(min(cluster)) print title, types[predicted] if actual == predicted: correct += 1 confusion_matrix[actual][predicted] += 1 array = np.array(cluster) order = array.argsort() ranks = order.argsort().tolist() k = -1 for i in xrange(len(cluster)): # 5 types of rg if title == types[ranks.index(i)]: k = i break j = len(cluster) - 1 while j >= k: accuracy_at_k[j] += 1 j -= 1 small_index = ['WS', 'Geo', 'ER', 'BA', 'PPM'] plt.figure(10) sns.set() ax = plt.axes() sns.heatmap(confusion_matrix, ax=ax, cmap="YlGnBu", yticklabels=index, xticklabels=small_index) ax.set_title('Confusion Matrix for Edge Imputed Graphs (' + str((p) * 100) + ' percent removed)') plt.tight_layout() plt.savefig( '/Users/Brennan/Desktop/Networks/networks-project/pictures/CM_' + str((p) * 100) + '_removed.png') plt.close() sns.reset_defaults() imp.reload(mpl) imp.reload(plt) imp.reload(sns) # import matplotlib as mpl # import matplotlib.pyplot as plt for i in xrange(len(accuracy_at_k)): accuracy_at_k[i] /= (samples * 1.0 * len(rgs)) if constraints_enforced: plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o', color='red') else: plt.plot([i for i in xrange(1, 6)], accuracy_at_k, marker='o') plt.xlabel('k (top k labels)') plt.ylim((0, 1.1)) plt.ylabel('Accuracy @ k') plt.title('Prediction Accuracy for Edge Imputed Graphs (' + str((p) * 100) + ' percent removed)') plt.savefig( '/Users/Brennan/Desktop/Networks/networks-project/pictures/PA_' + str((p) * 100) + '_removed.png') plt.tight_layout() plt.close() accuracy_at_removed.append(correct / (len(rgs) * samples)) plt.plot(remove_probability, accuracy_at_removed, marker='o') plt.xlabel('Percent of Edges Removed') plt.ylim((0, 1.1)) plt.ylabel('Accuracy @ 1') plt.title('Prediction Accuracy for Graph Recovery (Edge Imputation)') plt.savefig( '/Users/Brennan/Desktop/Networks/networks-project/pictures/graph_imputation_forall_p.png' ) plt.clf()
''' Written by Sara Camnasio [email protected] ''' import numpy as np import time from matplotlib import pyplot as plt import matplotlib.lines as mlines import seaborn.apionly as sns import pandas as pd import seaborn as sns from matplotlib.font_manager import FontProperties sns.set(color_codes=True) # Importing results from table source = np.genfromtxt( '/Users/saracamnasio/Dropbox/Research/Projects/UnusuallyRB/2016_Analysis/input/Final_sample.csv', delimiter=',', skip_header=1, dtype=float) source1 = np.genfromtxt( '/Users/saracamnasio/Dropbox/Research/Projects/UnusuallyRB/2016_Analysis/input/Final_sample.csv', delimiter=',', skip_header=1, dtype=str) # Naming values for easier plotting adjustments: names = source1[:, 0] IP = source[:, 18]
def contrastplot_test( data, x, y, idx=None, alpha=0.75, axis_title_size=None, barWidth=5, contrastShareY=True, contrastEffectSizeLineStyle='solid', contrastEffectSizeLineColor='black', contrastYlim=None, contrastZeroLineStyle='solid', contrastZeroLineColor='black', effectSizeYLabel="Effect Size", figsize=None, floatContrast=True, floatSwarmSpacer=0.2, heightRatio=(1, 1), idcol=None, lineWidth=2, legend=True, legendFontSize=14, legendFontProps={}, paired=False, pal=None, rawMarkerSize=8, rawMarkerType='o', reps=3000, showGroupCount=True, show95CI=False, showAllYAxes=False, showRawData=True, smoothboot=False, statfunction=None, summaryBar=False, summaryBarColor='grey', summaryBarAlpha=0.25, summaryColour='black', summaryLine=True, summaryLineStyle='solid', summaryLineWidth=0.25, summaryMarkerSize=10, summaryMarkerType='o', swarmShareY=True, swarmYlim=None, tickAngle=45, tickAlignment='right', violinOffset=0.375, violinWidth=0.2, violinColor='k', xticksize=None, yticksize=None, **kwargs): '''Takes a pandas dataframe and produces a contrast plot: either a Cummings hub-and-spoke plot or a Gardner-Altman contrast plot. ----------------------------------------------------------------------- Description of flags upcoming.''' # Check that `data` is a pandas dataframe if 'DataFrame' not in str(type(data)): raise TypeError("The object passed to the command is not not a pandas DataFrame.\ Please convert it to a pandas DataFrame.") # Get and set levels of data[x] if idx is None: widthratio=[1] allgrps=np.sort(data[x].unique()) if paired: # If `idx` is not specified, just take the FIRST TWO levels alphabetically. tuple_in=tuple(allgrps[0:2],) else: # No idx is given, so all groups are compared to the first one in the DataFrame column. tuple_in=(tuple(allgrps), ) if len(allgrps)>2: floatContrast=False else: if all(isinstance(element, str) for element in idx): # if idx is supplied but not a multiplot (ie single list or tuple) tuple_in=(idx, ) widthratio=[1] if len(idx)>2: floatContrast=False elif all(isinstance(element, tuple) for element in idx): # if idx is supplied, and it is a list/tuple of tuples or lists, we have a multiplot! tuple_in=idx if ( any(len(element)>2 for element in tuple_in) ): # if any of the tuples in idx has more than 2 groups, we turn set floatContrast as False. floatContrast=False # Make sure the widthratio of the seperate multiplot corresponds to how # many groups there are in each one. widthratio=[] for i in tuple_in: widthratio.append(len(i)) else: raise TypeError("The object passed to `idx` consists of a mixture of single strings and tuples. \ Please make sure that `idx` is either a tuple of column names, or a tuple of tuples for plotting.") # initialise statfunction if statfunction == None: statfunction=np.mean # Create list to collect all the contrast DataFrames generated. contrastList=list() contrastListNames=list() # # Calculate the bootstraps according to idx. # for ix, current_tuple in enumerate(tuple_in): # bscontrast=list() # for i in range (1, len(current_tuple)): # # Note that you start from one. No need to do auto-contrast! # tempbs=bootstrap_contrast( # data=data, # x=x, # y=y, # idx=[current_tuple[0], current_tuple[i]], # statfunction=statfunction, # smoothboot=smoothboot, # reps=reps) # bscontrast.append(tempbs) # contrastList.append(tempbs) # contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0]) # Setting color palette for plotting. if pal is None: if 'hue' in kwargs: colorCol=kwargs['hue'] colGrps=data[colorCol].unique() nColors=len(colGrps) else: colorCol=x colGrps=data[x].unique() nColors=len([element for tupl in tuple_in for element in tupl]) plotPal=dict( zip( colGrps, sns.color_palette(n_colors=nColors) ) ) else: plotPal=pal # Ensure summaryLine and summaryBar are not displayed together. if summaryLine is True and summaryBar is True: summaryBar=True summaryLine=False # Turn off summary line if floatContrast is true if floatContrast: summaryLine=False if swarmYlim is None: # get range of _selected groups_. u = list() for t in idx: for i in np.unique(t): u.append(i) u = np.unique(u) tempdat=data[data[x].isin(u)] swarm_ylim=np.array([np.min(tempdat[y]), np.max(tempdat[y])]) else: swarm_ylim=np.array([swarmYlim[0],swarmYlim[1]]) if contrastYlim is not None: contrastYlim=np.array([contrastYlim[0],contrastYlim[1]]) barWidth=barWidth/1000 # Not sure why have to reduce the barwidth by this much! if showRawData is True: maxSwarmSpan=0.25 else: maxSwarmSpan=barWidth # Expand the ylim in both directions. ## Find half of the range of swarm_ylim. swarmrange=swarm_ylim[1] -swarm_ylim[0] pad=0.1*swarmrange x2=np.array([swarm_ylim[0]-pad, swarm_ylim[1]+pad]) swarm_ylim=x2 # plot params if axis_title_size is None: axis_title_size=25 if yticksize is None: yticksize=18 if xticksize is None: xticksize=18 # Set clean style sns.set(style='ticks') axisTitleParams={'labelsize' : axis_title_size} xtickParams={'labelsize' : xticksize} ytickParams={'labelsize' : yticksize} svgParams={'fonttype' : 'none'} rc('axes', **axisTitleParams) rc('xtick', **xtickParams) rc('ytick', **ytickParams) rc('svg', **svgParams) if figsize is None: if len(tuple_in)>2: figsize=(12,(12/np.sqrt(2))) else: figsize=(8,(8/np.sqrt(2))) # Initialise figure, taking into account desired figsize. fig=plt.figure(figsize=figsize) # Initialise GridSpec based on `tuple_in` shape. gsMain=gridspec.GridSpec( 1, np.shape(tuple_in)[0], # 1 row; columns based on number of tuples in tuple. width_ratios=widthratio, wspace=0 ) for gsIdx, current_tuple in enumerate(tuple_in): #### FOR EACH TUPLE IN IDX plotdat=data[data[x].isin(current_tuple)] plotdat[x]=plotdat[x].astype("category") plotdat[x].cat.set_categories( current_tuple, ordered=True, inplace=True) plotdat.sort_values(by=[x]) # Drop all nans. plotdat=plotdat.dropna() # Calculate summaries. summaries=plotdat.groupby([x],sort=True)[y].apply(statfunction) if floatContrast is True: # Use fig.add_subplot instead of plt.Subplot ax_raw=fig.add_subplot(gsMain[gsIdx], frame_on=False) ax_contrast=ax_raw.twinx() else: # Create subGridSpec with 2 rows and 1 column. subGridSpec=gridspec.GridSpecFromSubplotSpec(2, 1, subplot_spec=gsMain[gsIdx], wspace=0) # Use plt.Subplot instead of fig.add_subplot ax_raw=plt.Subplot(fig, subGridSpec[0, 0], frame_on=False) ax_contrast=plt.Subplot(fig, subGridSpec[1, 0], sharex=ax_raw, frame_on=False) # Calculate the boostrapped contrast bscontrast=list() for i in range (1, len(current_tuple)): # Note that you start from one. No need to do auto-contrast! tempbs=bootstrap_contrast( data=data, x=x, y=y, idx=[current_tuple[0], current_tuple[i]], statfunction=statfunction, smoothboot=smoothboot, reps=reps) bscontrast.append(tempbs) contrastList.append(tempbs) contrastListNames.append(current_tuple[i]+' vs. '+current_tuple[0]) #### PLOT RAW DATA. if showRawData is True: # Seaborn swarmplot doc says to set custom ylims first. ax_raw.set_ylim(swarm_ylim) sw=sns.swarmplot( data=plotdat, x=x, y=y, order=current_tuple, ax=ax_raw, alpha=alpha, palette=plotPal, size=rawMarkerSize, marker=rawMarkerType, **kwargs) if summaryBar is True: bar_raw=sns.barplot( x=summaries.index.tolist(), y=summaries.values, facecolor=summaryBarColor, ax=ax_raw, alpha=summaryBarAlpha) if floatContrast: # Get horizontal offset values. maxXBefore=max(sw.collections[0].get_offsets().T[0]) minXAfter=min(sw.collections[1].get_offsets().T[0]) xposAfter=maxXBefore+floatSwarmSpacer xAfterShift=minXAfter-xposAfter # shift the swarmplots offsetSwarmX(sw.collections[1], -xAfterShift) ## get swarm with largest span, set as max width of each barplot. for i, bar in enumerate(bar_raw.patches): x_width=bar.get_x() width=bar.get_width() centre=x_width + (width/2.) if i == 0: bar.set_x(centre-maxSwarmSpan/2.) else: bar.set_x(centre-xAfterShift-maxSwarmSpan/2.) bar.set_width(maxSwarmSpan) ## Set the ticks locations for ax_raw. ax_raw.xaxis.set_ticks((0, xposAfter)) firstTick=ax_raw.xaxis.get_ticklabels()[0].get_text() secondTick=ax_raw.xaxis.get_ticklabels()[1].get_text() ax_raw.set_xticklabels([firstTick,#+' n='+count[firstTick], secondTick],#+' n='+count[secondTick]], rotation=tickAngle, horizontalalignment=tickAlignment) if summaryLine is True: for i, m in enumerate(summaries): ax_raw.plot( (i -summaryLineWidth, i + summaryLineWidth), # x-coordinates (m, m), color=summaryColour, linestyle=summaryLineStyle) if show95CI is True: sns.barplot( data=plotdat, x=x, y=y, ax=ax_raw, alpha=0, ci=95) ax_raw.set_xlabel("") if floatContrast is False: fig.add_subplot(ax_raw) #### PLOT CONTRAST DATA. if len(current_tuple)==2: # Plot the CIs on the contrast axes. plotbootstrap(sw.collections[1], bslist=tempbs, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, offset=floatContrast, color=violinColor, linewidth=1) if floatContrast: # Set reference lines ## First get leftmost limit of left reference group xtemp, _=np.array(sw.collections[0].get_offsets()).T leftxlim=xtemp.min() ## Then get leftmost limit of right test group xtemp, _=np.array(sw.collections[1].get_offsets()).T rightxlim=xtemp.min() ## zero line ax_contrast.hlines(0, # y-coordinates leftxlim, 3.5, # x-coordinates, start and end. linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) ## effect size line ax_contrast.hlines(tempbs['summary'], rightxlim, 3.5, # x-coordinates, start and end. linestyle=contrastEffectSizeLineStyle, linewidth=0.75, color=contrastEffectSizeLineColor) ## If the effect size is positive, shift the right axis up. if float(tempbs['summary'])>0: rightmin=ax_raw.get_ylim()[0] -float(tempbs['summary']) rightmax=ax_raw.get_ylim()[1] -float(tempbs['summary']) ## If the effect size is negative, shift the right axis down. elif float(tempbs['summary'])<0: rightmin=ax_raw.get_ylim()[0] + float(tempbs['summary']) rightmax=ax_raw.get_ylim()[1] + float(tempbs['summary']) ax_contrast.set_ylim(rightmin, rightmax) if gsIdx>0: ax_contrast.set_ylabel('') align_yaxis(ax_raw, tempbs['statistic_ref'], ax_contrast, 0.) else: # Set bottom axes ybounds if contrastYlim is not None: ax_contrast.set_ylim(contrastYlim) # Set xlims so everything is properly visible! swarm_xbounds=ax_raw.get_xbound() ax_contrast.set_xbound(swarm_xbounds[0] -(summaryLineWidth * 1.1), swarm_xbounds[1] + (summaryLineWidth * 1.1)) else: # Plot the CIs on the bottom axes. plotbootstrap_hubspoke( bslist=bscontrast, ax=ax_contrast, violinWidth=violinWidth, violinOffset=violinOffset, markersize=summaryMarkerSize, marker=summaryMarkerType, linewidth=lineWidth) if floatContrast is False: fig.add_subplot(ax_contrast) if gsIdx>0: ax_raw.set_ylabel('') ax_contrast.set_ylabel('') # Turn contrastList into a pandas DataFrame, contrastList=pd.DataFrame(contrastList).T contrastList.columns=contrastListNames ######## axesCount=len(fig.get_axes()) ## Loop thru SWARM axes for aesthetic touchups. for i in range(0, axesCount, 2): axx=fig.axes[i] if i!=axesCount-2 and 'hue' in kwargs: # If this is not the final swarmplot, remove the hue legend. axx.legend().set_visible(False) if floatContrast is False: axx.xaxis.set_visible(False) sns.despine(ax=axx, trim=True, bottom=False, left=False) else: sns.despine(ax=axx, trim=True, bottom=True, left=True) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(showAllYAxes) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) # Add zero reference line for swarmplots with bars. if summaryBar is True: axx.add_artist(Line2D( (axx.xaxis.get_view_interval()[0], axx.xaxis.get_view_interval()[1]), (0,0), color='black', linewidth=0.75 ) ) # I don't know why the swarm axes controls the contrast axes ticks.... if showGroupCount: count=data.groupby(x).count()[y] newticks=list() for ix, t in enumerate(axx.xaxis.get_ticklabels()): t_text=t.get_text() nt=t_text+' n='+str(count[t_text]) newticks.append(nt) axx.xaxis.set_ticklabels(newticks) if legend is False: axx.legend().set_visible(False) else: if i==axesCount-2: # the last (rightmost) swarm axes. axx.legend(loc='top right', bbox_to_anchor=(1.1,1.0), fontsize=legendFontSize, **legendFontProps) ## Loop thru the CONTRAST axes and perform aesthetic touch-ups. ## Get the y-limits: for j,i in enumerate(range(1, axesCount, 2)): axx=fig.get_axes()[i] if floatContrast is False: xleft, xright=axx.xaxis.get_view_interval() # Draw zero reference line. axx.hlines(y=0, xmin=xleft-1, xmax=xright+1, linestyle=contrastZeroLineStyle, linewidth=0.75, color=contrastZeroLineColor) # reset view interval. axx.set_xlim(xleft, xright) # # Draw back x-axis lines connecting ticks. # drawback_x(axx) if showAllYAxes is False: if i in range(2, axesCount): axx.yaxis.set_visible(False) else: # Draw back the lines for the relevant y-axes. # Not entirely sure why I have to do this. drawback_y(axx) sns.despine(ax=axx, top=True, right=True, left=False, bottom=False, trim=True) # Rotate tick labels. rotateTicks(axx,tickAngle,tickAlignment) else: # Re-draw the floating axis to the correct limits. lower=np.min(contrastList.ix['diffarray',j]) upper=np.max(contrastList.ix['diffarray',j]) meandiff=contrastList.ix['summary', j] ## Make sure we have zero in the limits. if lower>0: lower=0. if upper<0: upper=0. ## Get the tick interval from the left y-axis. leftticks=fig.get_axes()[i-1].get_yticks() tickstep=leftticks[1] -leftticks[0] ## First re-draw of axis with new tick interval axx.yaxis.set_major_locator(MultipleLocator(base=tickstep)) newticks1=axx.get_yticks() ## Obtain major ticks that comfortably encompass lower and upper. newticks2=list() for a,b in enumerate(newticks1): if (b >= lower and b <= upper): # if the tick lies within upper and lower, take it. newticks2.append(b) # if the meandiff falls outside of the newticks2 set, add a tick in the right direction. if np.max(newticks2)<meandiff: ind=np.where(newticks1 == np.max(newticks2))[0][0] # find out the max tick index in newticks1. newticks2.append( newticks1[ind+1] ) elif meandiff<np.min(newticks2): ind=np.where(newticks1 == np.min(newticks2))[0][0] # find out the min tick index in newticks1. newticks2.append( newticks1[ind-1] ) newticks2=np.array(newticks2) newticks2.sort() ## Second re-draw of axis to shrink it to desired limits. axx.yaxis.set_major_locator(FixedLocator(locs=newticks2)) ## Despine the axes. sns.despine(ax=axx, trim=True, bottom=False, right=False, left=True, top=True) # Normalize bottom/right Contrast axes to each other for Cummings hub-and-spoke plots. if (axesCount>2 and contrastShareY is True and floatContrast is False): # Set contrast ylim as max ticks of leftmost swarm axes. if contrastYlim is None: lower=list() upper=list() for c in range(0,len(contrastList.columns)): lower.append( np.min(contrastList.ix['bca_ci_low',c]) ) upper.append( np.max(contrastList.ix['bca_ci_high',c]) ) lower=np.min(lower) upper=np.max(upper) else: lower=contrastYlim[0] upper=contrastYlim[1] normalizeContrastY(fig, contrast_ylim = contrastYlim, show_all_yaxes = showAllYAxes) # if (axesCount==2 and # floatContrast is False): # drawback_x(fig.get_axes()[1]) # drawback_y(fig.get_axes()[1]) # if swarmShareY is False: # for i in range(0, axesCount, 2): # drawback_y(fig.get_axes()[i]) # if contrastShareY is False: # for i in range(1, axesCount, 2): # if floatContrast is True: # sns.despine(ax=fig.get_axes()[i], # top=True, right=False, left=True, bottom=True, # trim=True) # else: # sns.despine(ax=fig.get_axes()[i], trim=True) # Zero gaps between plots on the same row, if floatContrast is False if (floatContrast is False and showAllYAxes is False): gsMain.update(wspace=0.) else: # Tight Layout! gsMain.tight_layout(fig) # And we're all done. rcdefaults() # restore matplotlib defaults. sns.set() # restore seaborn defaults. return fig, contrastList