def paired_boxplot_tumor_normal(df, sig=True, cutoffs=[.01, .00001], order=None, ax=None): """ Draws a paired boxplot given a DataFrame with both tumor and normal samples on the index. '01' and '11' are hard-coded as the ids for tumor/normal. """ n = df.groupby(level=0).size() == 2 df = df.ix[n[n].index] if order is None: o = df.xs('11', level=1).median().order().index df = df[o[::-1]] else: df = df[order] l1 = list(df.xs('01', level=1).as_matrix().T) l2 = list(df.xs('11', level=1).as_matrix().T) boxes = [x for t in zip(l1, l2) for x in t] ax1, bp = paired_boxplot(boxes, ax) test = lambda v: Stats.ttest_rel(v.unstack()['01'], v.unstack()['11']) res = df.apply(test).T p = res.p if sig: pts = [(i * 3.5 + .5, 18) for i, n in enumerate(p) if n < cutoffs[1]] if len(pts) > 0: s1 = ax1.scatter(*zip(*pts), marker='$**$', label='$p<10^{-5}$', s=200) else: s1 = None pts = [(i * 3.5 + .5, 18) for i, n in enumerate(p) if (n < cutoffs[0]) and (n > cutoffs[1])] if len(pts) > 0: s2 = ax1.scatter(*zip(*pts), marker='$*$', label='$p<10^{-2}$', s=30) else: s2 = None ax1.legend(bp['boxes'][:2] + [s2, s1], ('Tumor', 'Normal', '$p<10^{-2}$', '$p<10^{-5}$'), loc='best', scatterpoints=1) else: ax1.legend(bp['boxes'][:2], ('Tumor', 'Normal'), loc='best') ax1.set_xticklabels(df.columns)