kla_only_no_p65 = sum((gr_group['gr_dex_tag_count'] <= min_tags) & (gr_group['gr_kla_dex_tag_count'] > min_tags) & (gr_group['p65_kla_tag_count'] + gr_group['p65_kla_dex_tag_count'] <= min_tags) ) counts = [tethered, direct_comp_gr, indirect_comp_gr, direct_comp_p65, cobound, direct_novel, indirect_novel, in_dex_no_p65, kla_only_no_p65] else: counts = [0]*9 stats = stats + counts all_stats.append(dict(zip(labels, stats))), index.append(group.name) grouped.apply(count_enhancers) # There must be a better way to do this group-apply, but I can't make it turn back into a DF... enhancer_counts = DataFrame(all_stats, index=index) spaced_labels = ['\n'.join(map(' '.join, [l.split()[i:i+2] for i in xrange(0,len(l.split()),2)] )) for l in labels] erna_title = 'Enhancers per Gene by enhancer subtype {0}'.format(name) ax = yzer.boxplot([enhancer_counts[col] for col in labels], spaced_labels, title=erna_title, xlabel='Subset', ylabel='Count', show_outliers=False, show_plot=False, wide=True ) yzer.ylim(ax, -1, 2) pyplot.setp(ax.get_xticklabels(), fontsize=10) yzer.save_plot(yzer.get_filename(img_dirpath, erna_title + '.png')) yzer.show_plot()
'{}_{}'.format(peptide, ab)) filename = yzer.get_filename( pep_dirpath, '{}_{}_enhancers_batf.txt'.format(peptide, ab)) data = yzer.import_file(filename) data = data.fillna(0) subset = data[data['no_pep_tag_count'] == 0] #subset = data[data['tag_count(2)'] == 0] #subset = subset[subset['tag_count(3)'] == 0] all_tag_counts.append(data['tag_count']) de_novo_tag_counts.append(subset['tag_count']) # Plot as boxplot ax = yzer.boxplot(all_tag_counts, conditions, title='Tag Counts in {} Enhancers'.format( ab.title()), xlabel='Condition', ylabel='Normalized tag count', save_dir=savepath, show_plot=True) ax = yzer.boxplot(de_novo_tag_counts, conditions, title='Tag Counts in de novo {} Enhancers'.format( ab.title()), xlabel='Condition', ylabel='Normalized tag count', save_dir=savepath, show_plot=True)
fold >= wt_data['tag_count']) & (wt_data['tag_count'] * fold >= wt_data['foxo1_ko_naive_atac_tag_count'])] ko_only = ko_data[ko_data['naive_atac_tag_count'] < min_thresh] save_path = yzer.get_and_create_path(dirpath, 'Figures', 'Foxo1_group_overlaps') groups = [wt_only, both, ko_only] labels = ['WT only', 'WT and KO', 'Foxo1 KO only'] if True: yzer.boxplot([gp['naive_foxo1_tag_count'] for gp in groups], labels, title='Foxo1 tags in ATAC-seq regions by group', ylabel='Foxo1 peak tag count', save_dir=save_path, show_plot=False) yzer.boxplot([gp['lcmv_d12_foxo1_tag_count'] for gp in groups], labels, title='LCMV d12 Foxo1 tags in ATAC-seq regions by group', ylabel='Foxo1 peak tag count', save_dir=save_path, show_plot=False) yzer.boxplot([gp['naive_h3k4me2_tag_count'] for gp in groups], labels, title='H3K4me2 tags in ATAC-seq regions by group', ylabel='H3K4me2 region count', save_dir=save_path, show_plot=False) yzer.boxplot(
groups = [data[none], data[kla_gt], data[nc], data[kla_dex_gt]] # We want to randomly sample to get equi-sized groups desired = len(nearby) for i, g in enumerate(groups): rows = random.sample(g.index, desired) groups[i] = g.ix[rows] to_plot = [g[colname] for g in (groups + [nearby])] title = 'LFC in KLA + Dex over KLA by change in p65:' \ + '\nRefSeq, randomly sampled to {0} transcripts'.format(desired) if pausing: title = 'Pausing Ratio Ratio by change in p65:' \ + '\nRefSeq, randomly sampled to {0} transcripts'.format(desired) ax = yzer.boxplot(to_plot, names, title=title, xlabel='Transcript Status', ylabel=(pausing and 'PausingRatio(KLA+Dex)/PausingRatio(KLA)')\ or 'log2(KLA+Dex GRO-seq/KLA GRO-seq)', show_outliers=False, show_plot=False) yzer.save_plot( yzer.get_filename( img_dirpath, '{2}_with_nearby_unique_{0}x_{3}_sampled_{1}.png'.format( ratio, random.randint(0, 9999), colname, change_type))) yzer.show_plot()
kla_4h_with_me2_counts = data['interacting_in_kla_4h_with_me2'][ 'count'].values.tolist() + [0] * zero_intxns_in_kla_4h_with_me2 labels = [ 'Less than 1/4\navg H3K4me2 in notx,\ninteractions in notx', 'At least avg H3K4me2 in notx\ninteractions in notx', 'Less than 1/4\navg H3K4me2 in notx,\ninteractions in KLA 30m', 'At least avg H3K4me2 in notx\ninteractions in KLA 30m', 'Less than 1/4\navg H3K4me2 in notx,\ninteractions in KLA 4h', 'At least avg H3K4me2 in notx\ninteractions in KLA 4h', ] vals = [ notx_with_less_me2_counts, notx_with_me2_counts, kla_30m_with_less_me2_counts, kla_30m_with_me2_counts, kla_4h_with_less_me2_counts, kla_4h_with_me2_counts ] labels = [ l + '\n(count: {0})'.format(len(v)) for l, v in zip(labels, vals) ] title = 'Number of interactions with "enhancers" by H3K4me2 state in notx' ax = yzer.boxplot(vals, labels, title=title, xlabel='Enhancer subset', ylabel='Number of interactions with other transcripts', show_outliers=False, show_plot=True, wide=True, save_dir=img_dirpath)
for group in (me2_only, ctcf_only, k27_only, ctcf_me2, k27_me2, nothing): print len(group), len(group)/len(data) if True: ax = yzer.boxplot([data['rpkm'], me2_tf['rpkm'], ctcf_tf['rpkm'], k27_tf['rpkm'], ctcf_me2_tf['rpkm'], k27_me2_tf['rpkm'], tf['rpkm'], me2_only['rpkm'], ctcf_only['rpkm'], k27_only['rpkm'], ctcf_me2['rpkm'], k27_me2['rpkm'], nothing['rpkm']], bar_names=['All Potential\nEnhancers', 'me2 + TF', 'CTCF + TF', 'K27 + TF', 'CTCF + me2\n+ TF', 'K27 + me2\n+ TF', 'TF', 'me2 only', 'CTCF only', 'K27 only', 'CTCF + me2', 'K27 + me2', 'No peaks',], title='GRO-seq RPKM at non-genic H3K4me2 regions', xlabel='', ylabel='Tags per 1000bp in GRO-seq transcript overlapping H3K4me2 peak', show_outliers=False, show_plot=False) yzer.save_plot(os.path.join(dirpath, 'groseq_rpkm_at_h3k4me2_peaks.png')) yzer.show_plot() ''' -- With H3K27me3 select distinct on (e.id) e.*, e.id as me2, reg.id as refseq, p1.id as pu_1, p2.id as cebpa,
grapher.show_plot() if True: # Boxplots: avg PU.1 in Bl6 for whole set; avg PU.1 in BALB for whole set; # avg PU.1 for NOD in whole set; avg PU.1 in NOD set with Bl6; avg PU.1 in NOD set with BALB ax = grapher.boxplot( [ data['wt_pu_1_tag_count'], data['balb_pu_1_tag_count_norm'], data['nod_pu_1_tag_count_norm'], nod_with_bl6['nod_pu_1_tag_count_norm'], nod_with_balb['nod_pu_1_tag_count_norm'] ], bar_names=[ 'C57Bl6 Peaks', 'BALBc Peaks', 'NOD Peaks', 'NOD Peaks\nwhere\nNOD == C57Bl6', 'NOD Peaks\nwhere\nNOD == BALBc', ], title= 'PU.1 peak tags where BALBc has a SNP that ruins its PU.1 Motif', xlabel='', ylabel='Tags per PU.1 peak', show_outliers=False, show_plot=False) grapher.save_plot( os.path.join(dirpath, 'peak_boxplots_no_balb_motif_filter_low_peaks.png')) grapher.show_plot() print 'p-val that BALBc is different than C57Bl6: %g' % ttest_ind(
save_path = yzer.get_and_create_path( dirpath, 'Figures', 'me2_atac_overlaps') yzer.piechart([len(atac_only), len(atac_me2)], ['ATAC only', 'ATAC with H3K4me2'], title='ATAC-seq region overlaps', save_dir=save_path) yzer.piechart([len(me2_only), len(me2_atac)], ['H3K4me2 only', 'H3K4me2 with ATAC'], title='H3K4me2 overlaps', save_dir=save_path) yzer.boxplot([atac_only['tag_count'], atac_me2['tag_count']], ['ATAC only', 'ATAC with H3K4me2'], title='ATAC-seq tag counts by H3K4me2 overlap', xlabel='Group', ylabel='Peak tag count', save_dir=save_path) yzer.boxplot([me2_only['tag_count'], me2_atac['tag_count']], ['H3K4me2 only', 'H3K4me2 with ATAC'], title='H3K4me2 tag counts by ATAC-seq overlap', xlabel='Group', ylabel='Peak tag count', save_dir=save_path) yzer.histogram(atac_only['tag_count'].tolist(), bins=20, title='ATAC-seq-only peak tag count distribution', xlabel='Tag count in peak', ylabel='Number of peaks', save_dir=save_path) yzer.histogram(me2_only['tag_count'].tolist(), bins=20, title='H3K4me2-only peak tag count distribution', xlabel='Tag count in peak', ylabel='Number of peaks', save_dir=save_path)
dirpath, 'balbc_vs_nod_pu_1_peak_tag_counts_bl6_gt_balb_unique.png')) grapher.show_plot() if True: # Boxplots ax = grapher.boxplot( [ data['wt_tag_count'], data['balb_tag_count_norm'], data['nod_tag_count_norm'], nod_with_bl6['nod_tag_count_norm'], nod_with_balb['nod_tag_count_norm'] ], bar_names=[ 'C57Bl6 Tags', 'BALBc Tags', 'NOD Tags', 'NOD Tags\nwhere\nNOD == C57Bl6', 'NOD Tags\nwhere\nNOD == BALBc', ], title='GRO-seq tags where BALBc has a SNP and half H3K4me2', xlabel='', ylabel='Tags in transcript at H3K4me2 peak', show_outliers=False, show_plot=False) grapher.save_plot( os.path.join(dirpath, 'peak_boxplots_all_h3k4me2_collapsed.png')) grapher.show_plot() if True: print 'p-val that BALBc is different than C57Bl6: %g' % ttest_ind( data['wt_tag_count'], data['balb_tag_count_norm'])[1]