Exemplo n.º 1
0
def plot_example_cross(plotter, save_dir):
    from src.chromatin import filter_mnase
    from src.plot_utils import apply_global_settings
    from config import cross_corr_sense_path
    from src.utils import get_orf

    orf_cc = pd.read_hdf(cross_corr_sense_path, 'cross_correlation')
    orfs = plotter.orfs

    gene = get_orf('APJ1', orfs)
    span = (gene.TSS - 500, gene.TSS + 500)
    plotter.set_span_chrom(span, gene.chr)

    cc_nuc = orf_cc.loc['nucleosomal'].loc[gene.name].loc[0.0]
    cc_small = orf_cc.loc['small'].loc[gene.name].loc[0.0]

    data = filter_mnase(plotter.all_mnase_data,
                        span[0],
                        span[1],
                        chrom=gene.chr,
                        time=0)

    fig, (ax, leg_ax) = plt.subplots(2, 1, figsize=(5, 6))
    fig.tight_layout(rect=[0.1, 0.1, 0.92, 0.945])
    plt.subplots_adjust(hspace=0.0, wspace=0.5)

    plotter.plot_typhoon_time(ax, data, 0, scale_z=True)
    ax.set_xlim(*span)
    ax.set_xticks(np.arange(span[0], span[1], 500))
    ax.set_xticks(np.arange(span[0], span[1], 100), minor=True)

    ax.set_xlabel("Position (bp)", fontsize=16)
    ax.set_ylabel("Fragment length (bp)", fontsize=16, labelpad=10)
    ax.set_ylim(-100, 250)

    draw_legend(leg_ax, span, 500)

    cc_ax = ax.twinx()
    cc_ax.set_ylabel("Cross correlation $\\times$0.1",
                     rotation=270,
                     fontsize=16,
                     labelpad=10,
                     va='bottom')

    scale_cc = 1
    y_origin = 0
    x = cc_nuc.index + gene.TSS
    y = cc_nuc.values * scale_cc + y_origin
    cc_ax.fill_between(x, y, y_origin, color='#28a098')

    y = -cc_small.values * scale_cc + y_origin
    cc_ax.fill_between(x, y_origin, y, color='#f28500')
    cc_ax.set_ylim(-0.1, 0.4)
    cc_ax.set_yticklabels(np.arange(-1, 5))

    write_path = '%s/%s.pdf' % (save_dir, 'example_cross_correlation')
    plt.savefig(write_path, transparent=True)
    print_fl("Wrote %s" % write_path)
def get_linkages_cc(orf_cc, gene_name, orfs):
    orf = get_orf(gene_name, orfs)

    cur_cc = orf_cc.loc['diff'].loc[orf.name]
    
    nucs = call_orf_nucleosomes(orf_cc.loc['diff'], orf)
    linkages = find_linkages(nucs)

    return cur_cc, linkages, nucs
Exemplo n.º 3
0
def entropy_examples():

    all_orfs = all_orfs_TSS_PAS()

    global plotter
    if plotter is None:
        plotter = get_plotter()

    from src.entropy import plot_entropy_example

    orf = get_orf('CLF1', all_orfs)
    plot_entropy_example(plotter, orf, (-460, 40), "Low entropy")
    plt.savefig('%s/low_entropy.pdf' % (misc_figures_dir), dpi=100)

    from src.entropy import plot_entropy_example

    orf = get_orf('HSP26', all_orfs)
    plot_entropy_example(plotter, orf, (200, 700), "High entropy")
    plt.savefig('%s/high_entropy.pdf' % (misc_figures_dir), dpi=100)
def plot_p123(gene_name, orf_cc, plotter, sum_plotter, save_dir):

    from src.nucleosome_linkages import plot_linkages_cc, get_linkages_cc, find_p123_gene
    from src.utils import get_orf

    plot_linkages_lines = False
    orf = get_orf(gene_name)
    cur_cc, linkages, _ = get_linkages_cc(orf_cc, gene_name, plotter.orfs)
    p1, p2, p3 = find_p123_gene(orf, linkages)

    if plot_linkages_lines:
        plot_linkages = linkages[linkages.link.isin([p1, p2, p3])]
        plot_linkages_cc(cur_cc, plot_linkages)

    plotter.disable_mnase_seq = False

    min_cc_plotting = -1

    plot_linkages = linkages[linkages.link.isin([p1, p2, p3])]
    plot_linkages = plot_linkages[
        plot_linkages.cross_correlation > min_cc_plotting]

    # typhoon plot of linkages
    plotter.linkages = plot_linkages
    fig, time_ax, twen_axs = plotter.plot_gene(gene_name,
                                               figwidth=12,
                                               padding=(500, 1000),
                                               highlight=False,
                                               dpi=100,
                                               save_dir=save_dir,
                                               prefix='typhoon_shift_')
    plt.close(fig)

    # get nucleosome positions by time and linkage
    # TODO: use this data structure for typhoon plot
    time_mids = pd.DataFrame()
    for p_nuc in [p1, p2, p3]:
        p_pos = linkages[linkages.link == p_nuc]
        cur_mids = linkages[linkages.link == p_nuc][['time', 'mid']]
        cur_mids['link'] = p_nuc
        time_mids = time_mids.append(cur_mids)

    # plot cross correlation
    sum_plotter.set_gene(gene_name)
    write_path = "%s/cc_%s.pdf" % (save_dir, gene_name)
    fig = sum_plotter.plot_cross_correlation_heatmap(
        show_colorbar=True,
        title='$\it{' + gene_name + '}$ cross correlation',
        nucs=time_mids)
    plt.savefig(write_path, transparent=False)
    plt.close(fig)
def plot_small_peaks(gene, all_peaks, plotter):

    all_orfs = all_orfs_TSS_PAS()

    orf_name = get_orf_name(gene)
    orf = get_orf(orf_name, all_orfs)

    span = orf.TSS - 1000, orf.TSS + 1000
    plotter.set_span_chrom(span, orf.chr)
    plotter.dpi = 100
    fig, axs, tween_axs = plotter.plot()

    for i in range(len(times)):
        time = times[i]
        ax = axs[i]
        
        data = all_peaks[(all_peaks.cross_correlation > 0.05) & 
                         (all_peaks.orf == orf.name) & 
                         (all_peaks.time == time)]
        ax.scatter(data.original_mid, data.cross_correlation+10.0)
Exemplo n.º 6
0
    def set_gene(self, gene_name):

        orf = get_orf(gene_name, paper_orfs)
        self.orf = orf
        self.gene_name = gene_name
        mnase = self.typhoon_plotter.all_mnase_data

        # default cross correlation
        cur_wide_counts_df, cur_cc = calculate_cross_correlation_orf(self.orf, 
            mnase, 3000, self.nuc_kernel, self.sm_kernel, self.triple_kernel)
        self.cur_cc = cur_cc
        self.cur_wide_counts_df = cur_wide_counts_df

        times = self.typhoon_plotter.times
        orf_entropies = []
        for i in range(len(times)):
            time = times[i]
            orf_cc = cur_cc.loc[self.kernel_type].loc[orf.name].loc[time]
            cur_e = calc_entropy(orf_cc[np.arange(self.e_range[0], self.e_range[1]+1)])
            orf_entropies.append(cur_e)
        self.orf_entropies = orf_entropies
Exemplo n.º 7
0
 def set_gene(self, gene_name):
     gene = get_orf(gene_name)
     self.gene = gene
     self.get_cc()
def plot_antisense_calling(gene_name, rna_seq_pileup):

    from src.rna_seq_plotter import get_smoothing_kernel
    from src.plot_utils import apply_global_settings
    from src.utils import get_orf
    from src.transcription import filter_rna_seq
    from src.transcription import filter_rna_seq_pileup
    from src.transcript_boundaries import load_park_boundaries
    from src.plot_orf_annotations import ORFAnnotationPlotter
    from config import paper_orfs
    from src.reference_data import read_sgd_orfs, read_park_TSS_PAS
    from src.datasets import read_orfs_data

    all_orfs = read_sgd_orfs()
    all_orfs = all_orfs.join(read_park_TSS_PAS()[['TSS', 'PAS']])

    orfs_plotter = ORFAnnotationPlotter(orfs=all_orfs)
    
    antisense_boundaries = read_orfs_data('%s/antisense_boundaries_computed.csv' % rna_dir)

    park_boundaries = load_park_boundaries()
    park_boundaries = park_boundaries.join(paper_orfs[['name']])

    orf = get_orf(gene_name, park_boundaries)

    search_2 = 1000
    span = orf.transcript_start-search_2, orf.transcript_stop+search_2
    gene_pileup = filter_rna_seq_pileup(rna_seq_pileup, 
    span[0], span[1], orf.chr)

    plot_span = span
    gene = orf
    gene_rna_seq = gene_pileup

    apply_global_settings(30)

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6, 5.))
    fig.tight_layout(rect=[0.1, 0, 1, 0.85])

    orfs_plotter.set_span_chrom(plot_span, gene.chr)
    orfs_plotter.plot_orf_annotations(ax1)

    sense_data = gene_rna_seq[gene_rna_seq.strand == '+']
    antisense_data = gene_rna_seq[gene_rna_seq.strand == '-']
    sense_data = np.log2(sense_data.groupby('position').sum()+1).pileup
    antisense_data = np.log2(antisense_data.groupby('position').sum()+1).pileup

    smooth_kernel = get_smoothing_kernel(100, 20)

    sense_strand = '+' if gene.strand == '+' else '-'
    antisense_strand = '+' if sense_strand == '-' else '-'

    x = sense_data.index
    sense_data = np.convolve(sense_data, smooth_kernel, mode='same')
    antisense_data = np.convolve(antisense_data, smooth_kernel, mode='same')

    ax2.plot(x, sense_data, color=plt.get_cmap('Blues')(0.5))
    ax2.plot(x, -antisense_data, color=plt.get_cmap('Reds')(0.5))
    ax2.set_xlim(*plot_span)
    ax2.set_ylim(-15, 15)
    ax2.axhline(0, color='black')

    if gene.name in antisense_boundaries.index:
        anti_gene = antisense_boundaries.loc[gene.name]
        
        y_plot = 0, 20 if gene.strand == '-' else -20, 0
        
        ax2.plot([anti_gene.start, anti_gene.start],
                [y_plot[0], y_plot[1]], color='red', linewidth=2.5, solid_capstyle='butt')
        ax2.plot([anti_gene.stop, anti_gene.stop],
                [y_plot[0], y_plot[1]], color='red', linewidth=2.5, solid_capstyle='butt')

    ax2.set_xticks(np.arange(plot_span[0], plot_span[1], 500))
    ax2.set_xticklabels([])
    _ = ax2.set_xticks(np.arange(plot_span[0], plot_span[1], 100), minor=True)

    ax2.tick_params(labelsize=14)
    ax2.set_ylabel("Sum log$_2$ (pileup+1)", fontsize=15)
    ax2.set_xlabel("Position (bp)", fontsize=15)

    ax1.set_title("Calling antisense transcripts", fontsize=26)

    ax2.axvline(383344)
    ax2.axvline(384114)