def plot_example_cross(plotter, save_dir): from src.chromatin import filter_mnase from src.plot_utils import apply_global_settings from config import cross_corr_sense_path from src.utils import get_orf orf_cc = pd.read_hdf(cross_corr_sense_path, 'cross_correlation') orfs = plotter.orfs gene = get_orf('APJ1', orfs) span = (gene.TSS - 500, gene.TSS + 500) plotter.set_span_chrom(span, gene.chr) cc_nuc = orf_cc.loc['nucleosomal'].loc[gene.name].loc[0.0] cc_small = orf_cc.loc['small'].loc[gene.name].loc[0.0] data = filter_mnase(plotter.all_mnase_data, span[0], span[1], chrom=gene.chr, time=0) fig, (ax, leg_ax) = plt.subplots(2, 1, figsize=(5, 6)) fig.tight_layout(rect=[0.1, 0.1, 0.92, 0.945]) plt.subplots_adjust(hspace=0.0, wspace=0.5) plotter.plot_typhoon_time(ax, data, 0, scale_z=True) ax.set_xlim(*span) ax.set_xticks(np.arange(span[0], span[1], 500)) ax.set_xticks(np.arange(span[0], span[1], 100), minor=True) ax.set_xlabel("Position (bp)", fontsize=16) ax.set_ylabel("Fragment length (bp)", fontsize=16, labelpad=10) ax.set_ylim(-100, 250) draw_legend(leg_ax, span, 500) cc_ax = ax.twinx() cc_ax.set_ylabel("Cross correlation $\\times$0.1", rotation=270, fontsize=16, labelpad=10, va='bottom') scale_cc = 1 y_origin = 0 x = cc_nuc.index + gene.TSS y = cc_nuc.values * scale_cc + y_origin cc_ax.fill_between(x, y, y_origin, color='#28a098') y = -cc_small.values * scale_cc + y_origin cc_ax.fill_between(x, y_origin, y, color='#f28500') cc_ax.set_ylim(-0.1, 0.4) cc_ax.set_yticklabels(np.arange(-1, 5)) write_path = '%s/%s.pdf' % (save_dir, 'example_cross_correlation') plt.savefig(write_path, transparent=True) print_fl("Wrote %s" % write_path)
def get_linkages_cc(orf_cc, gene_name, orfs): orf = get_orf(gene_name, orfs) cur_cc = orf_cc.loc['diff'].loc[orf.name] nucs = call_orf_nucleosomes(orf_cc.loc['diff'], orf) linkages = find_linkages(nucs) return cur_cc, linkages, nucs
def entropy_examples(): all_orfs = all_orfs_TSS_PAS() global plotter if plotter is None: plotter = get_plotter() from src.entropy import plot_entropy_example orf = get_orf('CLF1', all_orfs) plot_entropy_example(plotter, orf, (-460, 40), "Low entropy") plt.savefig('%s/low_entropy.pdf' % (misc_figures_dir), dpi=100) from src.entropy import plot_entropy_example orf = get_orf('HSP26', all_orfs) plot_entropy_example(plotter, orf, (200, 700), "High entropy") plt.savefig('%s/high_entropy.pdf' % (misc_figures_dir), dpi=100)
def plot_p123(gene_name, orf_cc, plotter, sum_plotter, save_dir): from src.nucleosome_linkages import plot_linkages_cc, get_linkages_cc, find_p123_gene from src.utils import get_orf plot_linkages_lines = False orf = get_orf(gene_name) cur_cc, linkages, _ = get_linkages_cc(orf_cc, gene_name, plotter.orfs) p1, p2, p3 = find_p123_gene(orf, linkages) if plot_linkages_lines: plot_linkages = linkages[linkages.link.isin([p1, p2, p3])] plot_linkages_cc(cur_cc, plot_linkages) plotter.disable_mnase_seq = False min_cc_plotting = -1 plot_linkages = linkages[linkages.link.isin([p1, p2, p3])] plot_linkages = plot_linkages[ plot_linkages.cross_correlation > min_cc_plotting] # typhoon plot of linkages plotter.linkages = plot_linkages fig, time_ax, twen_axs = plotter.plot_gene(gene_name, figwidth=12, padding=(500, 1000), highlight=False, dpi=100, save_dir=save_dir, prefix='typhoon_shift_') plt.close(fig) # get nucleosome positions by time and linkage # TODO: use this data structure for typhoon plot time_mids = pd.DataFrame() for p_nuc in [p1, p2, p3]: p_pos = linkages[linkages.link == p_nuc] cur_mids = linkages[linkages.link == p_nuc][['time', 'mid']] cur_mids['link'] = p_nuc time_mids = time_mids.append(cur_mids) # plot cross correlation sum_plotter.set_gene(gene_name) write_path = "%s/cc_%s.pdf" % (save_dir, gene_name) fig = sum_plotter.plot_cross_correlation_heatmap( show_colorbar=True, title='$\it{' + gene_name + '}$ cross correlation', nucs=time_mids) plt.savefig(write_path, transparent=False) plt.close(fig)
def plot_small_peaks(gene, all_peaks, plotter): all_orfs = all_orfs_TSS_PAS() orf_name = get_orf_name(gene) orf = get_orf(orf_name, all_orfs) span = orf.TSS - 1000, orf.TSS + 1000 plotter.set_span_chrom(span, orf.chr) plotter.dpi = 100 fig, axs, tween_axs = plotter.plot() for i in range(len(times)): time = times[i] ax = axs[i] data = all_peaks[(all_peaks.cross_correlation > 0.05) & (all_peaks.orf == orf.name) & (all_peaks.time == time)] ax.scatter(data.original_mid, data.cross_correlation+10.0)
def set_gene(self, gene_name): orf = get_orf(gene_name, paper_orfs) self.orf = orf self.gene_name = gene_name mnase = self.typhoon_plotter.all_mnase_data # default cross correlation cur_wide_counts_df, cur_cc = calculate_cross_correlation_orf(self.orf, mnase, 3000, self.nuc_kernel, self.sm_kernel, self.triple_kernel) self.cur_cc = cur_cc self.cur_wide_counts_df = cur_wide_counts_df times = self.typhoon_plotter.times orf_entropies = [] for i in range(len(times)): time = times[i] orf_cc = cur_cc.loc[self.kernel_type].loc[orf.name].loc[time] cur_e = calc_entropy(orf_cc[np.arange(self.e_range[0], self.e_range[1]+1)]) orf_entropies.append(cur_e) self.orf_entropies = orf_entropies
def set_gene(self, gene_name): gene = get_orf(gene_name) self.gene = gene self.get_cc()
def plot_antisense_calling(gene_name, rna_seq_pileup): from src.rna_seq_plotter import get_smoothing_kernel from src.plot_utils import apply_global_settings from src.utils import get_orf from src.transcription import filter_rna_seq from src.transcription import filter_rna_seq_pileup from src.transcript_boundaries import load_park_boundaries from src.plot_orf_annotations import ORFAnnotationPlotter from config import paper_orfs from src.reference_data import read_sgd_orfs, read_park_TSS_PAS from src.datasets import read_orfs_data all_orfs = read_sgd_orfs() all_orfs = all_orfs.join(read_park_TSS_PAS()[['TSS', 'PAS']]) orfs_plotter = ORFAnnotationPlotter(orfs=all_orfs) antisense_boundaries = read_orfs_data('%s/antisense_boundaries_computed.csv' % rna_dir) park_boundaries = load_park_boundaries() park_boundaries = park_boundaries.join(paper_orfs[['name']]) orf = get_orf(gene_name, park_boundaries) search_2 = 1000 span = orf.transcript_start-search_2, orf.transcript_stop+search_2 gene_pileup = filter_rna_seq_pileup(rna_seq_pileup, span[0], span[1], orf.chr) plot_span = span gene = orf gene_rna_seq = gene_pileup apply_global_settings(30) fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6, 5.)) fig.tight_layout(rect=[0.1, 0, 1, 0.85]) orfs_plotter.set_span_chrom(plot_span, gene.chr) orfs_plotter.plot_orf_annotations(ax1) sense_data = gene_rna_seq[gene_rna_seq.strand == '+'] antisense_data = gene_rna_seq[gene_rna_seq.strand == '-'] sense_data = np.log2(sense_data.groupby('position').sum()+1).pileup antisense_data = np.log2(antisense_data.groupby('position').sum()+1).pileup smooth_kernel = get_smoothing_kernel(100, 20) sense_strand = '+' if gene.strand == '+' else '-' antisense_strand = '+' if sense_strand == '-' else '-' x = sense_data.index sense_data = np.convolve(sense_data, smooth_kernel, mode='same') antisense_data = np.convolve(antisense_data, smooth_kernel, mode='same') ax2.plot(x, sense_data, color=plt.get_cmap('Blues')(0.5)) ax2.plot(x, -antisense_data, color=plt.get_cmap('Reds')(0.5)) ax2.set_xlim(*plot_span) ax2.set_ylim(-15, 15) ax2.axhline(0, color='black') if gene.name in antisense_boundaries.index: anti_gene = antisense_boundaries.loc[gene.name] y_plot = 0, 20 if gene.strand == '-' else -20, 0 ax2.plot([anti_gene.start, anti_gene.start], [y_plot[0], y_plot[1]], color='red', linewidth=2.5, solid_capstyle='butt') ax2.plot([anti_gene.stop, anti_gene.stop], [y_plot[0], y_plot[1]], color='red', linewidth=2.5, solid_capstyle='butt') ax2.set_xticks(np.arange(plot_span[0], plot_span[1], 500)) ax2.set_xticklabels([]) _ = ax2.set_xticks(np.arange(plot_span[0], plot_span[1], 100), minor=True) ax2.tick_params(labelsize=14) ax2.set_ylabel("Sum log$_2$ (pileup+1)", fontsize=15) ax2.set_xlabel("Position (bp)", fontsize=15) ax1.set_title("Calling antisense transcripts", fontsize=26) ax2.axvline(383344) ax2.axvline(384114)