def regression_plots(): from src.regression_compare import plot_compare_r2, load_results from src.gp import plot_res_distribution, plot_res_distribution_time from src.gp import GP gp_dir = "%s/gp" % OUTPUT_DIR mkdirs_safe([gp_dir]) # plot comparison plot_compare_r2(gp_dir) plt.savefig('%s/compare_gp_r2.pdf' % gp_dir, transparent=True) plot_compare_r2(gp_dir, show_legend=True) plt.savefig('%s/compare_gp_r2_legend.pdf' % gp_dir, transparent=True) from src.gp import plot_res_distribution_time, plot_res_distribution, GP results = load_results(gp_dir) for name in ['Full']: cur = GP(name, results_path='%s/%s_results.csv' % (gp_dir, name)) plot_res_distribution(cur, selected_genes=selected_genes) plt.savefig('%s/%s_predictions.pdf' % (gp_dir, name), transparent=True) for time in [7.5, 30, 120]: plot_res_distribution_time(cur, time, selected_genes=selected_genes) plt.savefig('%s/%s_%s.pdf' % (gp_dir, name, time), transparent=True, dpi=100)
def summary_plots(): orfs = paper_orfs orf_cc = pd.read_hdf(cross_corr_sense_path, 'cross_correlation') sum_plotter = SummaryPlotter(datastore, orfs, orf_cc) show_saved_plot = False custom_lims = {'TAD2': [(-3, 3), (-3, 3)], 'MET31': [(-2.5, 2.5), (-8, 8)]} cc_dir = '%s/cc' % OUTPUT_DIR lines_dir = '%s/lines' % OUTPUT_DIR mkdirs_safe([cc_dir, lines_dir]) sum_plotter.write_gene_plots(genes, cc_dir=cc_dir, lines_dir=lines_dir, show_plot=show_saved_plot, custom_lims=custom_lims) sum_plotter.write_gene_plots(['HSP26'], cc_dir=cc_dir, lines_dir=lines_dir, show_plot=show_saved_plot, custom_lims=custom_lims, suffix='_figure', large_font=True)
def compute_cross_correlations(strand='sense'): from src.cross_correlation_kernel import MNaseSeqDensityKernel from src.cross_correlation import calculate_cross_correlation_all_chromosomes cc_orfs = paper_orfs cc_dir = cc_sense_chrom_dir cross_corr_path = cross_corr_sense_path if strand == 'antisense': cc_orfs = antisense_orfs cc_dir = cc_antisense_chrom_dir cross_corr_path = cross_corr_antisense_path mkdirs_safe([cc_dir]) nuc_kernel = MNaseSeqDensityKernel(filepath=nuc_kernel_path) sm_kernel = MNaseSeqDensityKernel(filepath=sm_kernel_path) triple_kernel = compute_triple_kernel(nuc_kernel) print_fl("Cross correlating %d ORFs..." % len(cc_orfs)) cross, summary_cross = calculate_cross_correlation_all_chromosomes( all_mnase_data, cc_orfs, nuc_kernel, sm_kernel, triple_kernel, save_chrom_dir=cc_dir, timer=timer, log=True, find_antisense=(strand == 'antisense')) cross.to_hdf(cross_corr_path, 'cross_correlation', mode='w', complevel=9, complib='zlib') summary_cross.to_csv('%s/cross_correlation_summary_%s.csv' % (mnase_dir, strand)) print_fl("Done.") timer.print_time() print_fl()
def plot_heatmaps(): from config import OUTPUT_DIR from src.chromatin_metrics_data import pivot_metric from src.chromatin_heatmaps import ChromatinHeatmaps heatmaps = ChromatinHeatmaps(datastore) write_dir = '%s/heatmaps' % OUTPUT_DIR mkdirs_safe([write_dir]) heatmaps.show_xlabels = True heatmaps.show_saved_plot = False heatmaps.plot_heatmap(write_path=("%s/all.png" % write_dir), aspect_scale=25., fig_height=20., lines=[200, -200]) heatmaps.show_xlabels = False small_heatmap_scale = 15. heatmaps.plot_heatmap(head=200, write_path=("%s/t100.png" % write_dir), aspect_scale=small_heatmap_scale, lines=[-20]) heatmaps.show_xlabels = True heatmaps.plot_heatmap(tail=200, write_path=("%s/b100.png" % write_dir), aspect_scale=small_heatmap_scale, lines=[20]) heatmaps.show_xlabels = False heatmaps.plot_gene_names = True heatmaps.plot_heatmap(head=20, write_path=("%s/t20.png" % write_dir), aspect_scale=small_heatmap_scale) heatmaps.show_xlabels = True heatmaps.plot_heatmap(tail=20, write_path=("%s/b20.png" % write_dir), aspect_scale=small_heatmap_scale) heatmaps.plot_gene_names = False from src import plot_utils heatmaps.plot_colorbars(write_path='%s/cbar.png' % write_dir) from src import met4 heatmaps.show_saved_plot = True heatmaps.plot_gene_names = True heatmaps.plot_heatmap(orf_groups=met4.orf_groups(), group_names=met4.groups(), group_colors=met4.group_colors(), write_path=("%s/sulfur.pdf" % write_dir), fig_height=11, aspect_scale=5000., highlight_max=[], y_padding=1) heatmaps.plot_gene_names = False
def main(): print_fl("*******************************") print_fl("* 6 Reviewer Materials *") print_fl("*******************************") print_preamble() mkdirs_safe([save_dir]) plot_utils.apply_global_settings() # plots for shift edge analysis shift_edge_analysis.main() # additional scatter plots scatters() xrate_vs_TPM() # danpos danpos() # OD curve plot_OD_curve()
def tf_plots(): global plotter from src.small_peak_calling import SmallPeakCalling from src.utils import mkdir_safe from src.small_peak_calling import plot_tf_scatter, plot_tf_heatmap, \ plot_tf_summary from src.small_peak_calling import plot_colorbars small_peaks = SmallPeakCalling() small_peaks.load_data() save_dir = '%s/tf_analysis' % OUTPUT_DIR mkdir_safe(save_dir) plot_tf_scatter(small_peaks, t1=60) plt.savefig('%s/small_peaks_0_60.pdf' % save_dir, transparent=True) plot_tf_summary(small_peaks, tail=small_peaks.view_low) plt.savefig('%s/tf_means_bottom.pdf' % save_dir, transparent=True) plot_tf_summary(small_peaks, head=small_peaks.view_high) plt.savefig('%s/tf_means_top.pdf' % save_dir, transparent=True) plot_tf_summary(small_peaks) plt.savefig('%s/tf_means.pdf' % save_dir, transparent=True) # plot Aft1/Aft2 peaks labeled_peaks = small_peaks.all_motifs.copy() labeled_peaks = labeled_peaks[labeled_peaks.tf.isin( ['AFT1', 'AFT2'])][['orf', 'peak']].drop_duplicates() labeled_peaks = labeled_peaks.merge(paper_orfs[['name']], left_on='orf', right_on='orf_name') labeled_peaks = labeled_peaks.set_index('peak') selected_labeled_peaks = labeled_peaks[labeled_peaks['name'].isin( ['LEE1', 'SER33', 'ENB1'])] fig, ax = plot_tf_scatter(small_peaks, tf_names=['AFT1', 'AFT2'], labeled_peaks=selected_labeled_peaks, t1=60.0) plt.savefig("%s/aft1_aft2_scatter.pdf" % save_dir, transparent=True) # typhoon dir save_typhoon_dir = '%s/tf_analysis/typhoon/' % OUTPUT_DIR save_dir_all_motifs = '%s/tf_analysis/typhoon_all_motifs/' % OUTPUT_DIR mkdirs_safe([save_typhoon_dir, save_dir_all_motifs]) if plotter is None: plotter = get_plotter() aft_genes = ['SER33', 'LEE1', 'ENB1'] plotter.plot_genes(aft_genes, save_typhoon_dir, save_dir_all_motifs, times=[0.0, 30.0, 60.0], titlesize=34)
def danpos(): from src.dpos_bed import create_bed_for_dpos import os from src.utils import run_cmd working_dir = os.getcwd() danpos_output = '%s/danpos/' % (OUTPUT_DIR) mkdirs_safe([danpos_output]) danpos_path = "%s/danpos-2.2.2/danpos.py" % working_dir # create DANPOS Bed file mnase = pd.read_hdf(mnase_seq_path, 'mnase_data') mnase = mnase[mnase.time == 0] save_file = 'mnase_0.bed' save_path = '%s/%s' % (danpos_output, save_file) create_bed_for_dpos(mnase, save_path) print_fl("Wrote %s" % save_path) bash_command = "scripts/6_reviewer_mats/run_danpos.sh %s %s %s" % \ (save_file, OUTPUT_DIR, danpos_path) output, error = run_cmd(bash_command, stdout_file=None) danpos_calls_path = '%s/result/pooled/mnase_0.smooth.positions.xls' % \ (danpos_output) danpos_positions = pd.read_csv(danpos_calls_path, sep='\t') plt.hist(danpos_positions[danpos_positions.smt_value < 10000].smt_value, bins=100) plt.savefig("%s/danpos_smt_pos.png" % danpos_output) danpos_positions = danpos_positions.sort_values('smt_value', ascending=False) top_danpos = danpos_positions.head(2500) top_danpos = top_danpos.rename(columns={ 'chr': 'chromosome', 'smt_pos': 'position' }) from src.chromatin import collect_mnase from src.kernel_fitter import compute_nuc_kernel nuc_kernel = compute_nuc_kernel(mnase, top_danpos) nuc_kernel.save_kernel("%s/danpos_kernel.json" % danpos_output) from src.kernel_fitter import compute_triple_kernel nuc_kernel.plot_kernel(kernel_type='nucleosome') plt.savefig('%s/danpos_nuc_kernel.pdf' % (save_dir), transparent=True) triple_kernel = compute_triple_kernel(nuc_kernel) triple_kernel.plot_kernel(kernel_type='triple') plt.savefig('%s/danpos_triple_kernel.pdf' % (save_dir), transparent=True)
def save_data(self): save_dir = '%s/tf_analysis' % OUTPUT_DIR mkdirs_safe([save_dir]) print_fl("Saving %s" % save_dir) self.all_peaks.to_csv('%s/all_peaks.csv' % save_dir) self.linked_peaks_normalized.to_csv('%s/linked_peaks_norm.csv' % save_dir) self.linked_peaks.to_csv('%s/linked_peaks.csv' % save_dir) self.prom_peaks.to_csv('%s/prom_peaks.csv' % save_dir) self.all_motifs.to_csv('%s/all_motifs.csv' % save_dir)
def locus_plots(): """Merge typhoon, cc, and line plots into a single pdf""" from src.pdf_utils import merge_locus_pdf save_dir = '%s/locus_plots' % OUTPUT_DIR mkdirs_safe([save_dir]) for gene_name in genes: write_path = '%s/locus_%s.pdf' % (save_dir, gene_name) merge_locus_pdf(OUTPUT_DIR, gene_name, write_path)
def __init__(self, name, parent_watch_dir, num_wait, sleep_time=600, timer=None): self.name = name self.watch_dir = parent_watch_dir + '/' + name self.num_wait = num_wait self.sleep_time = sleep_time self.timer = timer # create directory to watch for slurm jobs to finish mkdirs_safe([self.watch_dir])
def main(): """ Initial BAM data download and conversion to dataframes 1. Download RNA-seq and MNase-seq BAM files to disk 2. Read RNA-seq BAM convert to dataframe and save 3. Compute RNA-seq pileup and save 4. Read MNase-seq BAM duplicates 5. Merged, sample, and save Inputs: - output directory path - RNA-seq BAM files path - MNase-seq BAM files path Output: - RNA-seq data frame of reads - RNA-seq pileup data frame - MNase-seq merged data frame """ print_fl("*******************************") print_fl("* 1 Initialization *") print_fl("*******************************") # ------ Setup ------------- print_preamble() # Make directories mkdirs_safe([ rna_bam_rep1_dir, mnase_bam_rep2_dir, mnase_bam_rep1_dir, rna_dir, mnase_dir ]) # ------- Download BAM files to disk ------------ print_fl("\n------- Downloading dataset ----------\n") download_bam() print_fl("\n------- RNA-seq ----------\n") init_rna_seq() print_fl("\n------- MNase-seq ----------\n") init_mnase_seq() print_fl("Data initialization done. Time elapsed: %s" % timer.get_time())
def call_p123_nucleosomes(strand='sense'): from src.nucleosome_linkages import call_all_nucleosome_p123 # relevant cross correlation directory p123_orfs = paper_orfs save_chrom_dir = sense_nuc_chrom_dir cc_dir = cc_sense_chrom_dir p1_path, p2_path, p3_path = ( p1_sense_path, p2_sense_path, p3_sense_path ) if strand == 'antisense': p123_orfs = antisense_orfs save_chrom_dir = anti_nuc_chrom_dir cc_dir = cc_antisense_chrom_dir p1_path, p2_path, p3_path = ( p1_antisense_path, p2_antisense_path, p3_antisense_path ) mkdirs_safe([save_chrom_dir]) print_fl("Calling +1, +2, and +3 nucleosomes...", end='\n') linkages, p123_orfs = call_all_nucleosome_p123(p123_orfs, (strand=='antisense'), cc_dir, save_chrom_dir, timer) linkages.to_csv('%s/called_orf_nucleosomes_%s.csv' % (mnase_dir, strand)) p123_orfs.to_csv('%s/called_orf_p123_nucleosomes_%s.csv' % (mnase_dir, strand)) p1 = nucleosome_linkages.convert_to_pos_time_df(p123_orfs, linkages, '+1') p2 = nucleosome_linkages.convert_to_pos_time_df(p123_orfs, linkages, '+2') p3 = nucleosome_linkages.convert_to_pos_time_df(p123_orfs, linkages, '+3') p1.to_csv(p1_path) p2.to_csv(p2_path) p3.to_csv(p3_path) print_fl('Done.') timer.print_time() print_fl()
def compute_organization_measures(strand='sense'): from src.entropy import calculate_cc_summary_measure orfs = paper_orfs if strand == 'antisense': orfs = antisense_orfs mkdirs_safe([sense_entropy_dir, anti_entropy_dir]) print_fl("Loading cross correlation") cross = pd.read_hdf('%s/cross_correlation_%s.h5.z' % (mnase_dir, strand), 'cross_correlation') print_fl("Calculating entropy %d ORFS..." % len(orfs)) entropies = calculate_cc_summary_measure(orfs, cross, strand, timer) entropies = entropies.round(3) entropies.to_csv('%s/orf_%s_entropies.csv' % (mnase_dir, strand)) timer.print_time() print_fl()
def typhoon_plots(): global plotter plotter = get_plotter() save_dir = '%s/typhoon' % OUTPUT_DIR save_dir_all_motifs = '%s/typhoon_all_motifs' % OUTPUT_DIR mkdirs_safe([save_dir, save_dir_all_motifs, misc_figures_dir]) figwidths = {'MCD4': 12, 'APJ1': 12} paddings = {'MCD4': (1000, 2000), 'APJ1': (1000, 2000)} print_fl("Plotting typhoons...", end='') plotter.plot_genes(genes, save_dir, save_dir_all_motifs, figwidths=figwidths, paddings=paddings) print_fl("Done.") timer.print_time() # example plots print_fl("Plotting examples...", end='') draw_example_mnase_seq(plotter, misc_figures_dir) draw_example_rna_seq(plotter, misc_figures_dir) # plot_example_cross(plotter, misc_figures_dir) print_fl("Done.") timer.print_time() # Hsp26 plot for figure 1 plotter.plot_genes(['HSP26'], save_dir, None, times=[0.0, 30.0, 60.0, 120.0], prefix='fig_')
def shift_plots(): from src.nucleosome_calling import plot_p123 from src.reference_data import read_sgd_orf_introns, read_sgd_orfs from src.reference_data import read_park_TSS_PAS from src.summary_plotter import SummaryPlotter global plotter orf_cc = pd.read_hdf(cross_corr_sense_path, 'cross_correlation') all_orfs = all_orfs_TSS_PAS() sum_plotter = SummaryPlotter(datastore, all_orfs, orf_cc) if plotter is None: plotter = get_plotter() save_dir = '%s/shift' % OUTPUT_DIR mkdirs_safe([save_dir]) shift_genes = ['RPS7A'] for gene_name in shift_genes: fig = plot_p123(gene_name, orf_cc, plotter, sum_plotter, save_dir) p1 = datastore.p1_shift[[120.0]] p2 = datastore.p2_shift[[120.0]] p3 = datastore.p3_shift[[120.0]] p12 = p1.join(p2, lsuffix='_+1', rsuffix='_+2') p23 = p2.join(p3, lsuffix='_+2', rsuffix='_+3') from src.chromatin_summary_plots import plot_distribution x = datastore.p1_shift[120] y = datastore.transcript_rate_logfold.loc[x.index][120.0] model = plot_distribution(x, y, '$\\Delta$ +1 nucleosome shift', '$\log_2$ fold-change transcription rate', title='+1 shift vs transcription, 0-120 min', xlim=(-40, 40), ylim=(-8, 8), xstep=10, ystep=2, pearson=True, s=10) plt.savefig('%s/shift_+1_xrate.pdf' % save_dir, transparent=True) x = datastore.p1_shift[120] y = datastore.p2_shift[120] model = plot_distribution(x, y, '$\\Delta$ +1 nucleosome shift', '$\\Delta$ +2 nucleosome shift', title='+1, +2 nucleosome shift\n0-120 min', xlim=(-40, 40), ylim=(-40, 40), xstep=10, ystep=10, pearson=False, s=10) plt.savefig('%s/shift_p12.pdf' % save_dir, transparent=True)
def determine_transcript_boundaries(): print_fl("Reading RNA-seq pileup...", end='') rna_seq_pileup = pd.read_hdf(pileup_path, 'pileup') print_fl("Done.") timer.print_time() print_fl() from src.transcript_boundaries import compute_boundaries, load_park_boundaries park_boundaries = load_park_boundaries() mkdirs_safe([anti_chrom_dir, sense_chrom_dir]) # ------------- Antisense transcript boundaries ------------------ print_fl("Determining antisense boundaries...", end='') antisense_boundaries = compute_boundaries(park_boundaries, rna_seq_pileup, save_dir=anti_chrom_dir, pileup_path=pileup_path, find_antisense=True, log=True, timer=timer) # all to compare with Park path = '%s/antisense_boundaries_computed_all.csv' % rna_dir antisense_boundaries.to_csv(path) # antisense paper data set path = '%s/antisense_boundaries_computed.csv' % rna_dir antisense_boundaries = antisense_boundaries[[ 'TSS', 'strand', 'start', 'stop' ]].join(paper_orfs[['name', 'chr', 'orf_class']], how='inner') antisense_boundaries.to_csv(antisense_orfs_path) print_fl("Done.") print_fl("Wrote to %s" % path) timer.print_time() print_fl() # ------------- Sense transcript boundaries ------------------ # compute sense boundaries # TODO: currently unused, only for check against Park boundaries print_fl("Determining sense boundaries...", end='') sense_boundaries = compute_boundaries(park_boundaries, rna_seq_pileup, save_dir=sense_chrom_dir, pileup_path=pileup_path, find_antisense=False, log=True, timer=timer) # all to compare with Park path = '%s/sense_boundaries_computed_all.csv' % rna_dir sense_boundaries.to_csv(path) # paper data set path = '%s/sense_boundaries_computed.csv' % rna_dir sense_boundaries.join(paper_orfs[[]], how='inner').to_csv(path) print_fl("Done.") print_fl("Wrote to %s" % path) timer.print_time() print_fl()
def main(): """ Computation of chromatin metrics against sense and antisense strand for analysis. 1. * For the sense strand for each ORF 2. Compute occupancies 3. Compute cross correlation and save these per chromosome to disk 4. Compute cross correlation summaries 5. Call nucleosomes in cross correlation window 6. Call +1, +2, and +3 nucleosomes relative to TSS 7. * Repeat 2-6 for identified for antisense TSSs Inputs: - MNase-seq data - RNA-seq data - Antisense transcript boundaries Output: - MNase-seq occupancy summaries for each ORF - Per bp cross correlation scores for each ORF - Cross correlation summary scores for each ORF - Called nucleosomes local to each ORF - Called +1, +2, +3 nucleosomes to each ORF * for Sense and Antisense strands """ print_fl("***********************") print_fl("* 3 Metrics *") print_fl("***********************") print_preamble() # paths to save cross correlations per chromosome mkdirs_safe([cc_sense_chrom_dir, cc_antisense_chrom_dir]) if USE_SLURM: mkdirs_safe([WATCH_TMP_DIR]) print_fl("\n------- Read inputs ----------\n") read_input_data() print_fl("\n------- Calculate occupancies (Sense) ----------\n") compute_occupancies() print_fl("\n------- Calculate cross correlation (Sense) ----------\n") compute_cross_correlations() print_fl("\n------- Calculate nucleosome shift (Sense) ----------\n") call_p123_nucleosomes() print_fl("\n------- Calculate entropy (Sense) ----------\n") compute_organization_measures(strand='sense') print_fl("\n------- Calculate occupancies (Antisense) ----------\n") compute_occupancies(strand='antisense') print_fl("\n------- Calculate cross correlation (Antisense) ----------\n") compute_cross_correlations(strand='antisense') print_fl("\n------- Calculate nucleosome shift (Antisense) ----------\n") call_p123_nucleosomes(strand='antisense') print_fl("\n------- Calculate entropy (Antisense) ----------\n") compute_organization_measures(strand='antisense') print_fl("\n--------- Generate data for supplemental ------------\n") create_suppl_data()
def init_rna_seq(): rna_seq_rep1_filenames = [ "DM538_RNA_rep1_0_min.bam", "DM539_RNA_rep1_7.5_min.bam", "DM540_RNA_rep1_15_min.bam", "DM541_RNA_rep1_30_min.bam", "DM542_RNA_rep1_60_min.bam", "DM543_RNA_rep1_120_min.bam" ] rna_seq_rep2_filenames = [ "DM1450_RNA_rep2_0_min.bam", "DM1451_RNA_rep2_7.5_min.bam", "DM1452_RNA_rep2_15_min.bam", "DM1453_RNA_rep2_30_min.bam", "DM1454_RNA_rep2_60_min.bam", "DM1455_RNA_rep2_120_min.bam" ] from src.read_bam import read_rna_seq_set from src.transcription import sample_rna # Read replicate 1 print_fl("Reading Replicate 1 RNA-seq BAM...", end='') rna_seq_rep1 = read_rna_seq_set(rna_bam_rep1_dir, rna_seq_rep1_filenames, source='dm538_dm543', debug=DEBUG) print_fl("Done.") timer.print_time() print_fl() # Read replicate 2 print_fl("Reading Replicate 2 RNA-seq BAM...", end='') rna_seq_rep2 = read_rna_seq_set(rna_bam_rep2_dir, rna_seq_rep2_filenames, source='dm1450_dm1455', debug=DEBUG) print_fl("Done.") timer.print_time() print_fl() # depth of each dataset rep1_depth = rna_seq_rep1[['chr', 'time']].groupby('time').count()\ .rename(columns={'chr':'count'}) rep2_depth = rna_seq_rep2[['chr', 'time']].groupby('time').count()\ .rename(columns={'chr':'count'}) print_fl("Rep1 read depth:\n" + str(rep1_depth), end='\n\n') print_fl("Rep2 read depth:\n" + str(rep2_depth), end='\n\n') sample_rep1_depth = rep1_depth['count'].min() sample_rep2_depth = rep2_depth['count'].min() print_fl("Sampling Rep1 to %d for each time point..." % sample_rep1_depth, end='') rep1_sampled = sample_rna(rna_seq_rep1, sample_rep1_depth) print_fl("Done.") timer.print_time() print_fl() print_fl("Sampling Rep2 to %d for each time point..." % sample_rep2_depth, end='') rep2_sampled = sample_rna(rna_seq_rep2, sample_rep2_depth) print_fl("Done.") timer.print_time() print_fl() # Merge replicates print_fl("Merging RNA-seq files...", end='') merged_rna = rna_seq_rep1.append(rna_seq_rep2) merged_rna = merged_rna[['chr', 'start', 'stop', 'length', 'strand', 'time', 'source']].sort_values( ['source', 'time', 'chr', 'start']) print_fl("Done.") timer.print_time() print_fl() # Merge sampled replicates print_fl("Merging RNA-seq files...", end='') merged_rna_sampled = rep1_sampled.append(rep2_sampled) merged_rna_sampled = merged_rna_sampled[['chr', 'start', 'stop', 'length', 'strand', 'time', 'source']].sort_values( ['source', 'time', 'chr', 'start']) print_fl("Done.") timer.print_time() print_fl() # Save to all RNA-seq data to disk save_path = '%s/rnase_seq_all.h5.z' % rna_dir print_fl("Saving merged RNase-seq to %s..." % save_path, end='') merged_rna.to_hdf(save_path, 'rna_seq_data', mode='w', complevel=9, complib='zlib') # Save merged data to disk save_path = '%s/rnase_seq_merged_sampled.h5.z' % rna_dir print_fl("Saving merged RNase-seq to %s..." % save_path, end='') merged_rna_sampled.to_hdf(save_path, 'rna_seq_data', mode='w', complevel=9, complib='zlib') print_fl("Done.") timer.print_time() print_fl() # convert to pileup dataframe mkdirs_safe([pileup_chrom_dir]) print_fl("Calculating RNA-seq pileup...", end='') from src.rna_seq_pileup import calculate_rna_seq_pileup pileup = calculate_rna_seq_pileup(merged_rna_sampled, timer) print_fl("Done.") timer.print_time() print_fl() save_path = pileup_path print_fl("Saving RNA-seq pileup to %s..." % save_path, end='') pileup.to_hdf(save_path, 'pileup', mode='w', complevel=9, complib='zlib') print_fl("Done.") timer.print_time() print_fl()
def misc_plots(): scatter_dpi = 200 from src.met4 import plot_timecourse from src.chromatin_summary_plots import ( plot_combined_vs_xrate, plot_sul_prom_disorg, plot_occ_vs_xrate, plot_disorg_vs_xrate, plot_diosorg_vs_occ, plot_frag_len_dist) from src.cross_correlation_kernel import MNaseSeqDensityKernel met4_dir = "%s/met4" % OUTPUT_DIR scatters_dir = "%s/scatters" % OUTPUT_DIR kernels_dir = "%s/kernels" % OUTPUT_DIR mkdirs_safe([met4_dir, scatters_dir, kernels_dir]) nuc_kernel = MNaseSeqDensityKernel(filepath=nuc_kernel_path) nuc_kernel.plot_kernel(kernel_type='nucleosome') plt.savefig('%s/nuc_kernel.pdf' % (kernels_dir), transparent=True) sm_kernel = MNaseSeqDensityKernel(filepath=sm_kernel_path) sm_kernel.plot_kernel(kernel_type='small') plt.savefig('%s/sm_kernel.pdf' % (kernels_dir), transparent=True) from src.kernel_fitter import compute_triple_kernel triple_kernel = compute_triple_kernel(nuc_kernel) triple_kernel.plot_kernel(kernel_type='triple') plt.savefig('%s/triple_kernel.pdf' % (kernels_dir), transparent=True) from src.nucleosome_calling import plot_nuc_calls_cc plot_nuc_calls_cc() plt.savefig('%s/nuc_cross_cor_0_min.pdf' % (misc_figures_dir), transparent=True) # met4 plots plot_timecourse(datastore) plt.savefig('%s/met4_timecourse.pdf' % (met4_dir), transparent=True) plot_sul_prom_disorg(datastore) plt.savefig('%s/met4_scatter.pdf' % (met4_dir), transparent=True, dpi=scatter_dpi) # scatter plots plot_combined_vs_xrate(datastore, selected_genes) plt.savefig('%s/combined_vs_xrate.pdf' % (scatters_dir), transparent=True, dpi=scatter_dpi) plot_occ_vs_xrate(datastore, selected_genes) plt.savefig('%s/small_vs_xrate.pdf' % (scatters_dir), transparent=True, dpi=scatter_dpi) plot_disorg_vs_xrate(datastore, selected_genes) plt.savefig('%s/disorg_vs_xrate.pdf' % (scatters_dir), transparent=True, dpi=scatter_dpi) plot_diosorg_vs_occ(datastore, selected_genes) plt.savefig('%s/disorg_vs_small.pdf' % (scatters_dir), transparent=True, dpi=scatter_dpi) plot_ORFs_len(misc_figures_dir) plot_coverage(misc_figures_dir) global plotter if plotter is None: plotter = get_plotter() # plot sampled mnase data plot_frag_len_dist(plotter.all_mnase_data) plt.savefig("%s/frag_length_distribution.pdf" % misc_figures_dir, transparent=True) print_fl("Load all MNase-seq data for fragment length distributions") all_mnase_data = pd.read_hdf('%s/mnase_seq_merged_all.h5.z' % mnase_dir, 'mnase_data') repl1_mnase = all_mnase_data[all_mnase_data['source'] == 'dm498_503'] repl2_mnase = all_mnase_data[all_mnase_data['source'] == 'dm504_509'] print_fl("Done.") plot_frag_len_dist(repl1_mnase, "Replicate 1", normalize=True) plt.savefig('%s/frag_length_distribution_repl1.pdf' % misc_figures_dir, transparent=True) plot_frag_len_dist(repl2_mnase, "Replicate 2", normalize=True) plt.savefig('%s/frag_length_distribution_repl2.pdf' % misc_figures_dir, transparent=True)
def antisense_plots(): from src.antisense_analysis import plot_antisense_vs_sense from src.antisense_analysis import plot_bar_counts, plot_antisense_dist save_dir = '%s/antisense' % OUTPUT_DIR mkdirs_safe([save_dir]) antisense_TPM = read_orfs_data('%s/antisense_TPM.csv' % rna_dir) antisense_TPM_logfold = read_orfs_data('%s/antisense_TPM_log2fold.csv' % rna_dir) plot_antisense_vs_sense( antisense_TPM_logfold, datastore.transcript_rate_logfold, 120.0, highlight=['MET31', 'CKB1', 'RPS7A', 'YBR241C', 'UTR2']) plt.savefig('%s/sense_antisense_distr.pdf' % save_dir, transparent=True, dpi=100) plot_bar_counts(antisense_TPM_logfold, datastore.transcript_rate_logfold) plt.savefig('%s/sense_antisense_counts.pdf' % save_dir) plot_antisense_dist(antisense_TPM_logfold) plt.savefig('%s/antisense_logfc_dist.pdf' % save_dir) from src.antisense_analysis import plot_antisense_lengths, plot_antisense_calling rna_seq_pileup = pd.read_hdf('%s/rna_seq_pileup.h5.z' % rna_dir, 'pileup') antisense_boundaries = read_orfs_data( '%s/antisense_boundaries_computed.csv' % rna_dir) plot_antisense_lengths() plt.savefig('%s/antisense_lengths_dist.pdf' % save_dir) plot_antisense_calling('MET31', rna_seq_pileup) plt.savefig('%s/antisense_met31_calling.pdf' % save_dir) from src.chromatin_summary_plots import plot_distribution anti_datastore = ChromatinDataStore(is_antisense=True) x = anti_datastore.promoter_sm_occupancy_delta.mean(axis=1) y = anti_datastore.antisense_TPM_logfold.mean(axis=1).loc[x.index] model = plot_distribution( x, y, '$\\Delta$ Antisense promoter occupancy', 'Log$_2$ fold-change antisense transcript', highlight=[], title='Promoter occupancy vs transcription (Antisense)', xlim=(-1.5, 1.5), ylim=(-4, 4), xstep=0.5, ystep=1) plt.savefig('%s/antisense_chrom_dist_prom_vs_xrate.pdf' % save_dir) x = anti_datastore.gene_body_disorganization_delta.mean(axis=1).dropna() y = anti_datastore.antisense_TPM_logfold.loc[x.index].mean( axis=1).loc[x.index] model = plot_distribution( x, y, '$\\Delta$ antisense nucleosome disorganization', 'Log$_2$ fold-change antisense transcripts', highlight=[], title='Nuc. disorganization vs transcription (Antisense)', xlim=(-1.5, 1.5), ylim=(-4, 4), xstep=0.5, ystep=1) plt.savefig('%s/antisense_chrom_dist_disorg_vs_xrate.pdf' % save_dir)