def dmr_cluster_count_by_class(blocks, show_labels=True, set_labels=None, outdir=None, figname="dmr_cluster_count_by_class", ax=None): """ Venn diagram showing the distribution of clusters amongst the classes for a single result. :param blocks: dictionary, keyed by class, values are iterables of IDs :param outdir: If supplied, write plot to a file. :param figname: :return: """ created = False if ax is None: fig = plt.figure() ax = fig.add_subplot(111) created = True # get classes # all_classes = dmr_results.classes sl, blocks = zip(*blocks.items()) # blocks = [] # sl = [] # for cls in all_classes: # d = dmr_results[cls] # blocks.append(d.keys()) # sl.append(cls) if set_labels is None: set_labels = sl if not show_labels: set_labels = None venn.venn_diagram(*blocks, ax=ax, set_labels=set_labels) if created: fig.tight_layout() if outdir is not None: ax.figure.savefig(os.path.join(outdir, "%s.png" % figname), dpi=200) ax.figure.savefig(os.path.join(outdir, "%s.pdf" % figname)) return ax
def dmr_overlap(dmr_results, pids=None, comparisons=('matched', 'gibco'), comparison_titles=('Isogenic', 'Reference'), outdir=None, figname='dmr_individual_overlap'): """ Plot a Venn diagram showing the overlap of DMRs in the the individuals. One plot per comparison (default) behaviour is to include two comparisons: isogenic and reference. This can be limited to a single DMR probe class, or (default) include all. :param dmr_results: :param pids: :param comparisons: :param comparison_titles: :param outdir: :param figname: :return: """ if len(comparisons) != len(comparison_titles): raise AttributeError( "Length of comparisons and comparison_titles must be equal.") if pids is None: pids = dmr_results.keys() fig, axs = plt.subplots(1, len(comparisons), num=figname) for i, cmp in enumerate(comparisons): inputs = [dmr_results[pid][cmp] for pid in pids] # n_level = 3 # if probe_class is not None: # inputs = [dmr.dict_by_sublevel(t, 2, probe_class) for t in inputs] # n_level = 2 # hasher = lambda x: tuple(x) # else: # hasher = lambda x: (x[0], x[2]) # # inputs = [ # set([hasher(t) for t, _ in dmr.dict_iterator(x, n_level=n_level)]) for x in inputs # ] venn.venn_diagram(set_labels=pids, ax=axs[i], *inputs) axs[i].set_title(comparison_titles[i]) fig.tight_layout() if outdir is not None: fig.savefig(os.path.join(outdir, "%s.png" % figname), dpi=200) fig.savefig(os.path.join(outdir, "%s.pdf" % figname))
def venn_dmr_counts(dmr_results, pids=None, outdir=None, figname='dmr_venn', comparisons=('matched', 'gibco'), set_labels=('Isogenic', 'Reference')): """ For each supplied patient (top level of keys in the supplied results), generate 3 Venn diagrams - all DMR count - hypermethylated DMR counts - hypomethylated DMR counts In each case, splitting according to `comparisons` :param dmr_results: Dictionary of results, e.g. from the results (results_significant) attribute of DmrResults. :param outdir: :param comparisons: Iterable giving the comparisons to use. :param set_labels: Iterable giving the set labels to use for the plot. Coresponds to comparisons. :return: """ if len(set_labels) != len(comparisons): raise AttributeError( "set_labels must be the same length as comparisons") if pids is None: pids = dmr_results.keys() # isogenic vs reference in each patient, all, hyper and hypomethylated fig, axs = plt.subplots(3, len(pids), figsize=(11, 7), num=figname) for i, pid in enumerate(pids): inputs = [dmr_results[pid][k] for k in comparisons] inputs_all = [set(x.keys()) for x in inputs] inputs_up = [ set([k for k, v in x.items() if v['median_change'] > 0]) for x in inputs ] inputs_down = [ set([k for k, v in x.items() if v['median_change'] < 0]) for x in inputs ] sl = set_labels if i == 0 else None venn.venn_diagram(set_labels=sl, ax=axs[0, i], *inputs_all) axs[0, i].set_title("GBM%s" % pid) venn.venn_diagram(set_labels=sl, ax=axs[1, i], *inputs_up) venn.venn_diagram(set_labels=sl, ax=axs[2, i], *inputs_down) fig.tight_layout() if outdir is not None: fig.savefig(os.path.join(outdir, "%s.png" % figname), dpi=200) fig.savefig(os.path.join(outdir, "%s_venn.pdf" % figname))
def plot_venn_de_directions( logfc, set_colours_dict, ax=None, set_labels=('Hypo', 'Hyper'), fontsize=16 ): if ax is None: fig = plt.figure(figsize=(5., 3.3)) ax = fig.add_subplot(111) vv, vs, vc = venn.venn_diagram( *[logfc[k].index[logfc[k]['consistent'].astype(bool)] for k in set_labels], set_labels=set_labels, set_colors=[set_colours_dict[t] for t in set_labels], ax=ax ) ax.figure.tight_layout() # modify labels based on direction this_members = collections.OrderedDict() for k in set_labels: this_res = logfc[k] this_ix = this_res['consistent'].astype(bool) this_res = np.sign(this_res.loc[this_ix].astype(float).mean(axis=1)) this_members["%s up" % k] = this_res.index[this_res > 0].difference(vs['11']) this_members["%s down" % k] = this_res.index[this_res < 0].difference(vs['11']) # get the corresponding label lbl = vv.get_label_by_id(setops.specific_sets(set_labels)[k]) ## FIXME: font family seems to change from old text to new - related to LaTeX rendering? lbl.set_text( lbl.get_text() + '\n' + r'$%d\uparrow$' % len(this_members["%s up" % k]) + '\n' + r'$%d\downarrow$' % len(this_members["%s down" % k]), ) plt.setp(common.get_children_recursive(ax, type_filt=plt.Text), fontsize=fontsize) return ax
all_gs_dict[('mTOR', )] = mtor_geneset for_export = pd.DataFrame(index=range( max((len(t) for t in all_gs_dict.values()))), columns=[]) for k, v in all_gs_dict.items(): the_key = '_'.join(k) for_export.loc[range(len(v)), the_key] = sorted(v) for_export.fillna('', inplace=True) for_export = for_export.sort_index(axis=1) for_export.to_excel(os.path.join(outdir, "gene_sets.xlsx"), index=False) # Venn diagram showing various mTOR signature options and overlap between them fig, ax = plt.subplots() venn.venn_diagram(*mtor_gs_dict.values(), set_labels=mtor_gs_dict.keys(), ax=ax) fig.tight_layout() ax.set_facecolor('w') fig.savefig(os.path.join(outdir, "venn_mtor_genesets.png"), dpi=200) basedir = os.path.join(HGIC_LOCAL_DIR, 'current/input_data/tcga') brennan_s7_fn = os.path.join(basedir, "brennan_s7.csv") brennan_s7 = pd.read_csv(brennan_s7_fn, header=0, index_col=0) if rnaseq_type == 'counts': rnaseq_dat_fn = os.path.join(basedir, 'rnaseq.xlsx') rnaseq_meta_fn = os.path.join(basedir, 'rnaseq.meta.xlsx') sheet_name = 'htseq' wang_fn = os.path.join(basedir, 'wang_classification',
'methylation.450k.meta.csv'), 'meth_27k': os.path.join(DATA_DIR, 'methylation', 'tcga_gbm', 'primary_tumour', 'methylation.27k.meta.csv'), } meta = {} for k, fn in meta_fn.items(): meta[k] = pd.read_csv(fn, header=0, index_col=0) # microarray only fig = plt.figure() ax = fig.add_subplot(111) venn.venn_diagram(meta['marr_u133'].case_id, meta['marr_agilent1'].case_id, meta['marr_agilent2'].case_id, set_labels=('U133', 'Agilent 1', 'Agilent 2'), ax=ax) plt.tight_layout(rect=(0, 0, 1., 1.05)) plt.savefig(os.path.join(outdir, "venn_by_caseid_microarray.png"), dpi=200) # rnaseq / methylation fig = plt.figure() ax = fig.add_subplot(111) venn.venn_diagram(meta['rnaseq'].index, meta['meth_450k'].index, meta['meth_27k'].index, set_labels=('RNA-Seq', 'Methylation 450K', 'Methylation 27K'), ax=ax) plt.tight_layout(rect=(0, 0, 1.05, 1.05))
de_in_all = reference_genomes.ensembl_to_gene_symbol( setops.reduce_intersection( *[t.index for t in de_res_separate.values()])) # sort this by the avg logFC logfc_in_all = pd.DataFrame.from_dict( dict([(p, v.loc[de_in_all.index, 'logFC']) for p, v in de_res_separate.items()])) logfc_in_all = logfc_in_all.loc[logfc_in_all.mean( axis=1).abs().sort_values(ascending=False).index] general.add_gene_symbols_to_ensembl_data(logfc_in_all) fig = plt.figure(figsize=(5, 5)) ax = fig.add_subplot(111, facecolor='w') venn.venn_diagram(set_labels=de_res_separate.keys(), *[t.index for t in de_res_separate.values()], ax=ax) fig.tight_layout() fig.savefig(os.path.join(outdir, "de_nsc_ss2-polya_combined_dispersion.png"), dpi=200) fig, axs = plt.subplots(nrows=2, ncols=2, sharex=True, sharey=True, figsize=(10, 10)) fig.tight_layout() for i, p in enumerate(pids): ax = axs.flat[i] sp = sample_pairs[p]
gl261_bmdm = pd.read_csv(os.path.join( indir, 'gl261_bmdm_vs_healthy_monocyte.csv'), header=0, index_col=0) gemm_mg = pd.read_csv(os.path.join(indir, 'gemm_mg_vs_healthy_mg.csv'), header=0, index_col=0) gemm_bmdm = pd.read_csv(os.path.join(indir, 'gemm_bmdm_vs_healthy_monocyte.csv'), header=0, index_col=0) fig = plt.figure() ax = fig.add_subplot(111) v, sets, counts = venn.venn_diagram(gl261_mg.index, gemm_mg.index, set_labels=("GL261 MG", "GEMM MG"), ax=ax) fig.tight_layout() fig.savefig(os.path.join(outdir, "gl261_mg-gemm_mg.png"), dpi=200) fig.savefig(os.path.join(outdir, "gl261_mg-gemm_mg.pdf")) fig = plt.figure() ax = fig.add_subplot(111) v, sets, counts = venn.venn_diagram(gl261_bmdm.index, gemm_bmdm.index, set_labels=("GL261 BMDM", "GEMM BMDM"), ax=ax) fig.tight_layout() fig.savefig(os.path.join(outdir, "gl261_bmdm-gemm_bmdm.png"), dpi=200) fig.savefig(os.path.join(outdir, "gl261_bmdm-gemm_bmdm.pdf"))
vert=True, patch_artist=True, medianprops=medianprops, widths=0.7) ax.set_xticks([]) for p, c in zip(bplot['boxes'], colours): p.set_facecolor(c) # add third column with Venn diagrams ax = fig.add_subplot(gs[ax_i + 1, 2], facecolor='none', frame_on=False, xticks=[], yticks=[]) v = venn.venn_diagram(*u_hypo.values(), ax=ax, set_labels=u_hypo.keys(), set_colors=[colours[1]] * 2) ax = fig.add_subplot(gs[ax_i, 2], facecolor='none', frame_on=False, xticks=[], yticks=[]) v = venn.venn_diagram(*u_hyper.values(), ax=ax, set_labels=u_hyper.keys(), set_colors=[colours[0]] * 2, alpha=0.7) for i, (k1, obj) in enumerate(to_plot.items()): axs[i, 0].set_ylabel(k1)
cols = [] for k, nm in zip(['res_1', 'res_2', 'res_4'], ['original', 'original_filter', 'group_dispersion']): for m in methods: cols.append("%s_%s" % (nm, m)) n_pair_only_intersect = pd.DataFrame(index=pids, columns=cols) for k, nm in zip(['res_1', 'res_2', 'res_4'], ['original', 'original_filter', 'group_dispersion']): for m in methods: this_res = to_save[k][m] # number of DE genes in Venn diagrams fig, axs = plt.subplots(nrows=3, ncols=4, figsize=(10, 8)) for i, pid in enumerate(pids): # number DE in the refs ax = axs.flat[i] venn.venn_diagram(*[this_res[pid][t].index for t in ['iNSC'] + refs], set_labels=['iNSC'] + refs, ax=ax) ax.set_title(pid, fontsize=16) for i in range(len(pids), 12): ax = axs.flat[i] ax.set_visible(False) fig.subplots_adjust(left=0.02, right=0.98, bottom=0.02, top=0.95) fig.savefig(os.path.join(outdir, "number_de_genes_ref_comparison_%s_%s.png" % (nm, m)), dpi=200) # number of PO genes in Venn diagrams fig, axs = plt.subplots(nrows=3, ncols=4, figsize=(10, 6)) for i, pid in enumerate(pids): a = this_res[pid]['iNSC'].index po = [] for ref in refs: b = this_res[pid][ref].index vs, vc = setops.venn_from_arrays(a, b)
# Venn diagrams (two ESC studies) s1 = ['_'.join(t) for t in zip(pids, ['ours'] * len(pids))] s2 = ['_'.join(t) for t in zip(['N2', '50'], ['kogut'] * 2)] fig, axs = plt.subplots(ncols=3, nrows=3) i = 0 for s in s1 + s2: this_arr = [] for r in ref_labels: k = ('iPSC_%s' % s, 'ESC_%s' % r) if k in de_res_sign: this_arr.append(de_res_sign[k]) if len(this_arr): print "Found comparison %s" % s venn.venn_diagram(*[t.index for t in this_arr], ax=axs.flat[i], set_labels=ref_labels) axs.flat[i].set_title(s) i += 1 else: print "No comparison %s" % s for j in range(i, axs.size): axs.flat[i].axis('off') fig.subplots_adjust(left=0.05, right=0.98, bottom=0.05, top=0.95) fig.savefig(os.path.join(outdir, "ipsc_esc_venn.png"), dpi=200) # core DE genes ipsc_vs_esc = dict([(k, v) for k, v in de_res_sign.items() if re.search('|'.join(ref_labels), k[1])
legend=False, ax=ax) if i == 0: ax.set_ylabel('% DMRs') else: ax.yaxis.set_visible(False) fig.tight_layout() fig.savefig(os.path.join(outdir, "dmr_direction_all_groups.png"), dpi=200) fig.savefig(os.path.join(outdir, "dmr_direction_all_groups.tiff"), dpi=200) # is there much overlap in the gene sets between the two groups? fig = plt.figure(figsize=(5, 3)) ax = fig.add_subplot(111) venn.venn_diagram(genes_from_dmr_groups['Hyper'], genes_from_dmr_groups['Hypo'], set_labels=['Hyper', 'Hypo'], ax=ax) fig.tight_layout() fig.savefig(os.path.join(outdir, "genes_from_dmr_groups_venn.png"), dpi=200) # no, but if we look at the intersection genes, are they in different directions (DE) between the two groups? groups_inv = dictionary.complement_dictionary_of_iterables(groups, squeeze=True) in_both = setops.reduce_intersection(*genes_from_dmr_groups.values()) in_both_ens = reference_genomes.gene_symbol_to_ensembl(in_both) # some of these will have no DE results tmp = {} for pid in pids: tmp[pid] = de_res_s1[pid].reindex(in_both_ens).logFC.dropna()