def plot_OD_curve(): from src.plot_utils import apply_global_settings from src.colors import parula data = pd.read_csv('data/070513_cadmium.csv') data = data.set_index('Hour') apply_global_settings() fig = plt.figure(figsize=(8, 6)) cols = list(reversed(data.columns[data.columns.str.startswith('ave ')])) colors = plt.get_cmap('magma_r') i = 0 for col in cols: label = "%s uM" % (col.split(' ')[1]) plt.plot(data.index, data[col], label=label, color=colors(0.1 + 0.9 * (i * 1. / len(cols))), lw=2) i += 1 plt.xlim(0, data.index.max()) plt.ylim(0, 1.7) plt.legend(ncol=4) plt.suptitle("Growth curve, cadmium", fontsize=24) plt.ylabel("Optical density, OD$_{600}$", fontsize=18) plt.xlabel("Time, hours", fontsize=18) save_path = '%s/cadmium_growth.pdf' % (save_dir) plt.savefig(save_path, transparent=True, dpi=scatter_dpi)
def plot_compare(data, markers, colors, fill_styles, metric='r2', rename={}, show_legend=False): plot_utils.apply_global_settings() model_names = data.columns fig, ax = plt.subplots(figsize=(9,4)) fig.tight_layout(rect=[0.05, 0.05, 0.65, 0.9]) x = np.arange(6) times=[0, 7.5, 15, 30, 60, 120] spacing = 0.09 n_models = len(model_names) for i in range(n_models): model_name = model_names[i] model_data = data[model_name] label = model_name if label in rename.keys(): label = rename[label] x_pos = x - spacing*(n_models-1)/2. + spacing*i ax.plot(x_pos, model_data, marker=markers[i], color=colors[i], label=label.replace('Promoter occupancy', 'Small fragment occupancy'), markersize=7, alpha=1., fillstyle=fill_styles[i], linewidth=0, zorder=10) if metric == 'mse': ax.set_ylim(1, 8.5) ax.set_ylabel('MSE', fontsize=12) ax.set_title("Model evaluation, MSE") for i in np.arange(0, 10, 1): ax.axhline(y=i, linewidth=0.1, linestyle='solid', color='#303030', zorder=1) else: ax.set_yticks(np.arange(-1.2, 1.2, 0.2)) ax.set_ylim(-0.05, 1.0) ax.set_ylabel('Coefficient of determination, $R^2$', fontsize=16) ax.set_title("Model evaluation, $R^2$", fontsize=20) for i in np.arange(0, 1.0, 0.1): ax.axhline(y=i, linewidth=0.1, linestyle='solid', color='#303030', zorder=1) ax.set_xticks(np.arange(0, len(times))) ax.set_xticklabels(['%s\'' % t for t in times]) ax.set_xlim(.5, 5.5) ax.tick_params('x', labelsize=16, length=0, width=0, pad=10) if show_legend: ax.legend(bbox_to_anchor=(1.02, 1.), fontsize=14, frameon=False) for i in range(6): ax.axvline(x=i+0.5, linewidth=1.0, color='#303030')
def plot_colorbars(small_peaks, write_path=None): from src.chromatin_heatmaps import _make_fake_cbar from src import plot_utils apply_global_settings(linewidth=2) fig, axs = plt.subplots(2, 1, figsize=(8,2)) plt.subplots_adjust(hspace=0.0, wspace=0.0) fig.subplots_adjust(left=0.5) fig.patch.set_alpha(0.0) ax1, ax2 = tuple(axs) titles = ['Log$_2$ fold-change\ntranscription rate', 'Log$_2$ fold-change\nbinding occupancy'] scale_cbars = [1, small_peaks.bin_scale] formating = ['%.0f', '%.2f'] for i in range(len(axs)): ax = axs[i] title = titles[i] vlim = small_peaks.im_scale scale_cbar = 1./scale_cbars[i] _make_fake_cbar(ax, vlim, title, scale=scale_cbar, str_format=formating[i]) plot_utils.format_spines(ax, lw=1.2)
def plot_cluster_lines(chrom_clustering): apply_global_settings(titlepad=15) cluster_data = chrom_clustering.hc.clustered_data clusters = cluster_data.cluster.unique() n_clusters = len(clusters) fig, axs = plt.subplots(n_clusters, 3, figsize=(3 * 2, n_clusters * 2.2)) _ = axs fig.patch.set_alpha(0.0) fig.tight_layout(rect=[0.05, 0.05, 0.95, 0.95]) plt.subplots_adjust(hspace=0.75, wspace=0.25) for i in np.arange(n_clusters): axs_row = axs[i] cluster = clusters[i] title = 'Cluster %d' % cluster chrom_clustering.plot_cluster(axs_row[0], axs_row[1], axs_row[2], cluster, title=title, xlab=(i == n_clusters - 1)) for ax in axs_row: ax.patch.set_alpha(1.0) ax.patch.set_facecolor('white')
def main(): print_fl("*******************************") print_fl("* 6 Reviewer Materials *") print_fl("*******************************") print_preamble() mkdirs_safe([save_dir]) plot_utils.apply_global_settings() # plots for shift edge analysis shift_edge_analysis.main() # additional scatter plots scatters() xrate_vs_TPM() # danpos danpos() # OD curve plot_OD_curve()
def plot_tf_regulon_heatmap(tf, small_peaks, is_high=True): apply_global_settings(titlepad=15) regulon_xrate = small_peaks.regulon_xrate plot_data = regulon_xrate[regulon_xrate.tf == tf]\ .set_index('orf_name')[times] plot_data = plot_data.drop_duplicates() plot_data = plot_data.loc[plot_data[times].mean(axis=1)\ .sort_values(ascending=False).index] fig, ax = plt.subplots(1, 1, figsize=(4, 10)) fig.tight_layout(rect=[0.1, 0.1, 0.9, 0.9]) ax.imshow(plot_data, vmin=-small_peaks.im_scale, vmax=small_peaks.im_scale, cmap='RdBu_r', aspect=1.) if len(plot_data) < 50: ax.set_yticks(np.arange(len(plot_data))) ax.set_yticklabels(paper_orfs[['name']].loc[plot_data.index]['name']) else: ax.set_yticks([]) ax.set_title("%s regulon\ntranscription" % tf) ax.tick_params(axis='y', length=0, pad=4, labelsize=10) ax.set_xticks([]) return fig
def plot_antisense(self, antisense=None): apply_global_settings(titlepad=45) cluster_data = self.hc.clustered_data from src.datasets import read_orfs_data if antisense is None: antisense = read_orfs_data('%s/antisense_TPM.csv' % rna_dir) data = antisense.loc[cluster_data.index] data = data.join(cluster_data[['cluster']]) fig, ax = plt.subplots(figsize=(7, 4)) fig.tight_layout(rect=[0.05, 0.1, 0.95, 0.8]) times = [0.0, 7.5, 15, 30, 60, 120] num_clusters = len(data.cluster.unique()) for c in range(1, num_clusters + 1): c_data = data[data.cluster == c][times] for i in range(len(times)): time = times[i] cur = c_data[time].values lower = np.quantile(cur, 0.75) upper = np.quantile(cur, 0.25) median = np.median(cur) spacing = 0.13 x = c + spacing * i - spacing * 2.5 ax.plot([x, x], [lower, upper], linewidth=3., color='#FF5C5C', alpha=1, solid_capstyle='butt') ax.scatter(x, median, s=6, marker='D', color='black', zorder=10) ticks = np.arange(num_clusters + 1) ax.set_xticks(ticks) ax.set_xlim(0.5, num_clusters + 0.5) # ax.set_yticks(np.arange(0, 40, 10)) ax.tick_params(axis='x', length=0, pad=10, labelsize=16) ax.tick_params(axis='y', labelsize=16) ax.set_ylabel('Transcripts per million', fontsize=18) ax.set_xlabel('Cluster', fontsize=18) ax.set_title('Antisense transcripts per cluster', fontsize=23) for x in np.arange(1, num_clusters): ax.axvline(x + 0.5, color='#d0d0d0', linewidth=1)
def plot_frag_len_dist(mnase_data, title="Subsampled, merged fragment lengths", normalize=True, plt_legend=False): from config import times from src.plot_utils import plot_density, apply_global_settings lengths = mnase_data.groupby( ['time', 'length']).count()[['chr']].rename(columns={'chr': 'count'}) from src.timer import Timer timer = Timer() apply_global_settings() fig, ax = plt.subplots(figsize=(6, 4)) fig.tight_layout(rect=[0.1, 0.1, 0.825, 0.85]) colors = plt.get_cmap('magma_r') i = 0 for time in times: color = colors(float(i) * 0.8 / 5. + 1. / 5) data = lengths.loc[time] max_len = data.idxmax().values[0] print("Most frequent length for %s: %d" % (str(time), max_len)) if normalize: data = data / data.sum() ax.plot(data, color=color, label="%s min" % str(time)) i += 1 ax.set_title(title, fontsize=20) ax.set_xlabel('Fragment length (bp)') ax.set_ylabel('Density') ax.set_ylim(0, 0.02) ax.set_xlim(0, 250) if plt_legend: ax.legend(bbox_to_anchor=(1.35, 1.), frameon=False)
def plot_nuc_calls_cc(): from src.plot_utils import apply_global_settings from config import cross_corr_sense_path cross = pd.read_hdf(cross_corr_sense_path, 'cross_correlation') time = 0 cur_cross = cross.loc['nucleosomal'].query('time == %s' % str(time)) cols = cur_cross.columns cur_cross = cur_cross.reset_index().set_index('orf_name')[cols] peak_1 = cur_cross.sum().idxmax() peak_2 = cur_cross[np.arange(peak_1 + 80, 500)].sum().idxmax() peak_3 = cur_cross[np.arange(peak_2 + 80, 500)].sum().idxmax() print_fl("Computed nucleosome spacing:", log=True) print_fl("+1, +2 distance: %0.0f" % (peak_2 - peak_1), log=True) print_fl("+2, +3 distance: %0.0f" % (peak_3 - peak_2), log=True) apply_global_settings() fig, ax = plt.subplots(1, 1, figsize=(6, 4)) fig.tight_layout(rect=[0.1, 0.1, 0.9, 0.9]) ax.plot(cur_cross.sum()) import matplotlib.patheffects as path_effects for p in [peak_1, peak_2, peak_3]: ax.axvline(p, linestyle='solid', color='red', alpha=0.25, lw=3) text = ax.text(p, 500, "TSS+%d" % p, ha='center', fontsize=12) text.set_path_effects([ path_effects.Stroke(linewidth=10, foreground='white'), path_effects.Normal() ]) x = np.arange(-200, 800, 100) ax.set_xticks(x) xlabels = [str(val) if val < 0 else '+%d' % val for val in x] xlabels[2] = 'TSS' ax.set_xticklabels(xlabels) ax.set_title("Gene body nucleosomes, 0 min", fontsize=24) ax.set_ylim(0, 600) ax.set_xlim(-200, 600) ax.set_xlabel('Position (bp)') ax.set_ylabel( 'Cumulative nucleosome\ncross correlation score across genes')
def plot_half_lifes(self): apply_global_settings(titlepad=20) cluster_data = self.hc.clustered_data from src.datasets import read_orfs_data half_lifes = read_orfs_data('data/half_life.csv')[['half_life']] data = half_lifes.loc[cluster_data.index] data = data.join(cluster_data[['cluster']]) fig, ax = plt.subplots(figsize=(7, 5)) fig.tight_layout(rect=[0.05, 0.1, 0.95, 0.8]) times = [0.0, 7.5, 15, 30, 60, 120] for c in range(1, 8): cur = data[data.cluster == c].half_life lower = np.quantile(cur, 0.75) upper = np.quantile(cur, 0.25) median = np.median(cur) spacing = 0.13 x = c + spacing * 3 - spacing * 2.5 ax.plot([x, x], [lower, upper], linewidth=6., color='#abd1fc', alpha=1, solid_capstyle='butt') ax.scatter(x, median, s=16, marker='D', color='black', zorder=10) ticks = np.arange(8) ax.set_xticks(ticks) ax.set_xlim(0.5, 7.5) # ax.set_yticks(np.arange(0, 200, 50)) ax.set_ylim(0, 50) ax.tick_params(axis='x', length=0, pad=10, labelsize=16) ax.tick_params(axis='y', labelsize=16) ax.set_ylabel('Half life, min', fontsize=18) ax.set_xlabel('Cluster', fontsize=18) ax.set_title('Half lifes per cluster', fontsize=30) for x in np.arange(1, 8): ax.axvline(x + 0.5, color='#d0d0d0', linewidth=1)
def plot_antisense_lengths(): antisense_boundaries = read_orfs_data('%s/antisense_boundaries_computed.csv' % rna_dir) from src.plot_utils import apply_global_settings apply_global_settings() fig, ax = plt.subplots(figsize=(4.5, 3)) fig.tight_layout(rect=[0.05, 0.05, 0.95, 0.9]) antisense_lengths = (antisense_boundaries.stop - antisense_boundaries.start).dropna() ax.hist(antisense_lengths, bins=25, linewidth=1, edgecolor='white') ax.set_title("Antisense transcript lengths, N=%d" % len(antisense_lengths), fontsize=18) ax.set_xlabel("Length (bp)") ax.set_ylabel("# of genes")
def plot_association(data, key, name, color): apply_global_settings(titlepad=10) prom_data = data.sort_values(key) colors = plt.get_cmap('tab10') fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 8)) ax1.scatter(prom_data[key], np.arange(len(prom_data)), s=10, color=color) ax1.set_xlim(-7, 7) ax1.set_ylim(0, len(prom_data)) ax1.axvline(0, color='black', linestyle='solid', linewidth=1.5, zorder=1) ax1.set_yticks([]) ax1.set_xlabel('$\\Delta$ %s z-score' % (name[0:1].upper() + name[1:])) ax1.axvline(1, color='red', linestyle='solid', linewidth=1.5, zorder=1, alpha=0.5) num_q = 10 x = np.arange(len(prom_data)) n_q = len(prom_data)/num_q for q in range(num_q): anti = prom_data['120.0_antisense_x_logfold'][(q*n_q):((q+1)*n_q)] y_center = n_q*q+n_q/2. plot_violin(anti, ax=ax2, bw=0.3, arange=(-10, 10, 0.05), y_offset=y_center, mult=500., color='#c3abdb') plt.plot(np.median(anti), y_center, markersize=7, color='white', marker='D', zorder=4) plt.plot(np.median(anti), y_center, markersize=7, color='black', fillstyle='none', marker='D', zorder=4) ax2.set_ylim(0, len(prom_data)) ax2.set_xlim(-7, 7) ax2.set_xlabel('Log$_2 $ fold-change antisense transcripts') ax2.axvline(0, color='black', linestyle='solid', linewidth=1.5, zorder=2) ax2.set_yticks([]) plt.suptitle('Antisense %s\n0-120 min' % name, fontsize=16) ax1.set_title("Sorted $\Delta$ %s" % name) ax2.set_title("Log$_2 $ fold-change Antisense transcripts")
def main(): print_fl("*********************") print_fl("* 5 Figures *") print_fl("*********************") print_preamble() plot_utils.apply_global_settings() print_fl("\n------- Typhoon ----------\n") typhoon_plots() print_fl("\n------- Line/Cross Plots ----------\n") summary_plots() print_fl("\n------- Locus plots ----------\n") locus_plots() print_fl("\n------- GO Plots ----------\n") go_bar_plots() print_fl("\n------- Heatmap Plots ----------\n") plot_heatmaps() print_fl("\n------- Regression Plots ----------\n") regression_plots() print_fl("\n------- Antisense Plots ----------\n") antisense_plots() print_fl("\n------- TF Plots ----------\n") tf_plots() print_fl("\n------- Other ----------\n") misc_plots() print_fl("\n------- Entropy ----------\n") entropy_examples() print_fl("\n--------- Shift -----------\n") shift_plots()
def draw_example_mnase_seq(plotter, save_dir): from src.chromatin import filter_mnase apply_global_settings(linewidth=1.75) plotter.linewidth = 1 span = (124380, 125380) data = filter_mnase(plotter.all_mnase_data, span[0], span[1], chrom=2, time=0) fig, (ax, leg_ax) = plt.subplots(2, 1, figsize=(5, 4)) fig.tight_layout(rect=[0.1, 0.1, 0.95, 0.945]) plt.subplots_adjust(hspace=0.0, wspace=0.5) plotter.set_span_chrom(span, 2) plotter.plot_typhoon_time(ax, data, 0, scale_z=True) ax.set_xlim(*span) ax.set_xticks(np.arange(span[0], span[1], 500)) ax.set_xticks(np.arange(span[0], span[1], 100), minor=True) ax.set_yticks(np.arange(0, 250, 100)) ax.set_yticks(np.arange(0, 250, 50), minor=True) ax.tick_params(axis='y', labelsize=11.5, zorder=20) ax.set_xlabel("Position (bp)", fontsize=16) ax.set_ylabel("Fragment length (bp)", fontsize=16, labelpad=7) draw_legend(leg_ax, span, 500) write_path = '%s/%s.pdf' % (save_dir, 'example_mnase_seq') plt.savefig(write_path, transparent=True) plotter.linewidth = 2.5 apply_global_settings()
def plot_gene_tfs_hm(tf, small_peaks, datastore): from config import times time = 120.0 # select orfs with the tf bound selected_peaks = small_peaks.all_motifs[small_peaks.all_motifs.tf == tf] selected_orfs = small_peaks.all_motifs[small_peaks.all_motifs.tf == tf].orf.values # load the transcription data xrate = datastore.transcript_rate_logfold[times]\ .loc[selected_orfs] # load the linked peaks peaks = small_peaks.linked_peaks_normalized peaks = peaks.loc[selected_peaks.peak.values] peaks_diff = difference(peaks) peaks_diff *= 100.0 plot_data = peaks_diff.join(selected_peaks.set_index('peak')[['orf']])\ .reset_index() plot_data = plot_data.groupby('orf').mean() plot_data = plot_data[times].join(xrate[times], rsuffix='_logfold_TPM', lsuffix='_sm_occ').reset_index().groupby('index').mean() plot_data = plot_data.sort_values('120.0_sm_occ', ascending=False) names = plot_data.join(small_peaks.all_orfs[['name']])['name'] names = [n for n in names] apply_global_settings() plt.figure(figsize=(8, 18)) plt.imshow(plot_data, aspect=20./len(plot_data), vmin=-10, vmax=10, cmap='RdBu_r') plt.yticks(np.arange(len(plot_data)), names) plt.xticks([3, 9], ['Bin occupancy', 'Transcription'])
def plot_antisense_vs_sense(antisense_logfold_TPM, sense_logfold_rate, time, highlight=[]): antisense_logfold_TPM = antisense_logfold_TPM.loc[sense_logfold_rate.index] apply_global_settings() sense_data = sense_logfold_rate[time] anti_data = antisense_logfold_TPM[time] ax = plot_distribution(sense_data, anti_data, "log$_2$ fold-change Sense transcription rate", "log$_2$ fold-change Antisense transcripts", highlight=highlight, xlim=(-8, 8), xstep=2, ylim=(-8, 8), ystep=2, pearson=False, aux_lw=1.5, plot_minor=False, title="Sense vs antisense\ntranscription, 0-%.0f min" % time) for x in [-2, 2]: ax.axvline(x, linewidth=2, color='#505050', zorder=98) ax.axhline(x, linewidth=2, color='#505050', zorder=98) ax.axvline(x, linestyle='solid', color='#505050', linewidth=2.5, zorder=98)
def plot_tf_summary(small_peaks, head=None, tail=None): summ_dif = small_peaks.tf_mean_means if head is not None: summ_dif = summ_dif.tail(head) # sorted descending elif tail is not None: summ_dif = summ_dif.head(tail) # sorted descending summ_dif = summ_dif.reset_index().rename(columns={'index': 'name'}) x = summ_dif.index.values subset = head is not None or tail is not None if subset: apply_global_settings() if head is not None: fig, ax = plt.subplots(1, 1, figsize=(4, 4)) else: fig, ax = plt.subplots(1, 1, figsize=(3, 4)) fig.tight_layout(rect=[0.15, 0.1, 0.99, 0.9]) lw = 7 ax.set_xticks(x) ax.set_xticklabels(summ_dif['name'].str.title(), rotation=90, ha='center', va='top') ax.tick_params(axis='x', length=0, pad=4, labelsize=13.5) ax.set_yticks(np.arange(-0.6, 0.6, 0.1)) if head is None: ax.set_ylabel("Log$_2$ fold-change\naverage occupancy", fontsize=14) else: ax.set_yticks([]) else: apply_global_settings(linewidth=4, titlepad=80) fig, ax = plt.subplots(1, 1, figsize=(16, 10)) fig.tight_layout(rect=[0.1, 0.1, 0.99, 0.75]) lw = 7 ax.set_title("Transcription factor binding\noccupancy dynamics, " "0-120 min", fontsize=50) ax.tick_params(axis='y', length=10, pad=5, labelsize=22) ax.set_ylabel("Log$_2$ fold-change\nin average occupancy", fontsize=30) ax.set_xticks([]) ax.set_yticks(np.arange(-0.6, 0.6, 0.1)) ax.set_xlim(-0.75, len(x)-1+0.75) plot_key = 'mean' if subset: for x in np.arange(0, len(summ_dif)): ax.plot([x, x], [-10, 0], lw=1.5, linestyle='solid', color='#f9f9f9') for idx, row in summ_dif.iterrows(): ax.plot([idx, idx], [0, row.loc[plot_key]], c='#c0c0c0', lw=lw, solid_capstyle='butt') # high filtered = summ_dif[summ_dif['name'].isin(small_peaks.selected_high_tfs.index)] for idx, row in filtered.iterrows(): if subset: # ax.axvline(idx, lw=1.5, linestyle='solid', color=red(0.075)) ax.plot([idx, idx], [-10, 0], lw=1.5, linestyle='solid', color=red(0.075)) ax.plot([idx, idx], [0, row.loc[plot_key]], c=red(), lw=lw, solid_capstyle='butt') for ticklabel in ax.get_xticklabels(): if ticklabel.get_text() in filtered['name'].str.title().values: ticklabel.set_color(red()) # low filtered = summ_dif[summ_dif['name'].isin(small_peaks.selected_low_tfs.index)] for idx, row in filtered.iterrows(): if subset: # ax.axvline(idx, lw=1.5, linestyle='solid', color=blue(0.1)) ax.plot([idx, idx], [-10, 0], lw=1.5, linestyle='solid', color=blue(0.1)) ax.plot([idx, idx], [0, row.loc[plot_key]], c=blue(), lw=lw, solid_capstyle='butt') for ticklabel in ax.get_xticklabels(): if ticklabel.get_text() in filtered['name'].str.title().values: ticklabel.set_color(blue()) if not subset: high_n, low_n = small_peaks.view_high, small_peaks.view_low plot_rect(ax, -0.5, -1, low_n, 2, color='#f0f0f0', zorder=0) plot_rect(ax, len(summ_dif)-high_n+.5, -1, 20, 2, color='#f0f0f0', zorder=0) ax.set_ylim(-0.25, 0.25) ax.axhline(0, linewidth=2, color='black')
def plot_tf_scatter(small_peaks, tf_name=None, tf_names=None, t0=0.0, t1=120.0, no_annotations=False, labeled_peaks=None, dpi=300): apply_global_settings(dpi=dpi) linked_peaks = small_peaks.linked_peaks_normalized all_motifs = small_peaks.all_motifs plot_data = linked_peaks\ .loc[small_peaks.prom_peaks['name']].copy() fig, ax = plt.subplots(1, 1, figsize=(6.5, 6.5)) fig.tight_layout(rect=[0.1, 0.1, 0.9, 0.9]) x = plot_data[t0] y = plot_data[t1] def plot_line(ax, line): x = np.array([0, 1]) m, b = line y = x*m + b ax.plot(x, y, c='gray', linestyle='dashed', linewidth=1) plot_line(ax, (1, 0)) if tf_name is not None and tf_names is None: tf_names = [tf_name] if tf_names is None: ax.scatter(x, y, s=1, c='#b0b0b0') if not no_annotations: high_peaks, low_peaks = get_threshold_peaks(small_peaks, plot_data, t0, t1) sc1 = ax.scatter(plot_data[plot_data.index.isin(high_peaks)][t0], plot_data[plot_data.index.isin(high_peaks)][t1], s=20, color=red(), marker='D', linewidth=1, facecolor='none',) sc2 = ax.scatter(plot_data[plot_data.index.isin(low_peaks)][t0], plot_data[plot_data.index.isin(low_peaks)][t1], s=20, color=blue(), marker='o', linewidth=1, facecolor='none') plt.legend([sc1, sc2], ['Increased, N=%d' % len(high_peaks), 'Decreased, N=%d' % len(low_peaks)]) plot_threshold_line(ax, 1) else: ax.scatter(x, y, s=1, c='#d0d0d0') i = 0 markers = ['o', 'x'] selected_sc = [] labels = [] colors = [parula()(0.5), parula()(0.0)] sizes = 0 for tf_name in tf_names: color = colors[i] sel_peaks = linked_peaks.loc[all_motifs[all_motifs.tf == tf_name].peak] sc = ax.scatter(sel_peaks[t0], sel_peaks[t1], color=color, marker=markers[i]) selected_sc.append(sc) labels.append("%s, N=%d" % (tf_name.title(), len(sel_peaks))) i += 1 plt.legend(selected_sc, labels) plot_threshold_line(ax, small_peaks.fc_threshold) if labeled_peaks is not None: labeled_peaks = plot_data.join(labeled_peaks, how='inner') for idx, p in labeled_peaks.iterrows(): ax.text(p.loc[t0], p.loc[t1]+0.005, p['name'], ha='center', va='center', fontsize=13, fontdict={'style':'italic'}) ax.set_xlim(0., 0.12) ax.set_ylim(0., 0.12) ax.set_xlabel('Peak occupancy, 0 min') ax.set_ylabel('Peak occupancy, %s min' % (str(t1))) if tf_names is not None: tf_names = [tf.title() for tf in tf_names] ax.set_title("%s change in promoter small fragment\npeaks, 0-%.0f min" % ("/".join(tf_names), t1), fontsize=20) else: ax.set_title("Change in promoter small fragment\npeaks, 0-%s min, N=%d" % (str(t1), len(x)), fontsize=20) return fig, ax
def plot_entropy_example(plotter, orf, plot_span, title): from src.chromatin import filter_mnase from src.utils import get_orf from src.reference_data import all_orfs_TSS_PAS import matplotlib.pyplot as plt span = (orf.TSS - 1000, orf.TSS + 1000) data = filter_mnase(plotter.all_mnase_data, span[0], span[1], chrom=orf.chr, time=120) data['orf_name'] = orf.name data.mid = data.mid - orf.TSS from src.kernel_fitter import compute_triple_kernel from src.cross_correlation_kernel import MNaseSeqDensityKernel nuc_kernel = MNaseSeqDensityKernel(filepath=nuc_kernel_path) sm_kernel = MNaseSeqDensityKernel(filepath=sm_kernel_path) triple_kernel = compute_triple_kernel(nuc_kernel) from src.transformations import exhaustive_counts from src.cross_correlation import compute_cross_correlation_metrics win_2 = 1000 cur_wide_counts_df = exhaustive_counts((-win_2, win_2), (0, 250), 'mid', 'length', parent_keys=['orf_name', 'time'], data=data, returns='wide', log=False) cur_cc = compute_cross_correlation_metrics(cur_wide_counts_df, nuc_kernel, sm_kernel, triple_kernel, times=[120.0]) triple_cc = cur_cc.loc['triple'].loc[orf.name].loc[120] from src.entropy import calc_entropy from src.plot_utils import apply_global_settings apply_global_settings() triple_cc_values = triple_cc[np.arange(plot_span[0], plot_span[0] + 150)].values value = calc_entropy(triple_cc_values) fig, ax = plt.subplots(1, 1, figsize=(1.5, 2.5)) fig.tight_layout(rect=[0.0, 0.0, 1, 0.8]) plt.subplots_adjust(hspace=0.0, wspace=0.5) plotter.set_span_chrom(plot_span, orf.chr) plotter.plot_typhoon_time(ax, data, 120, scale_z=True) ax.set_xlim(*plot_span) ax.set_xticks([]) ax.set_yticks([]) ax.set_xticks([], minor=True) ax.set_yticks([], minor=True) ax.tick_params(axis='x', length=0, pad=0) ax.tick_params(axis='y', length=0, pad=0) x = triple_cc.index.values.astype(int) y = triple_cc.values.astype(float) ax.fill_between(x, y, color='#28a098') ax.set_title("%s\n%.1f bits" % (title, value))
def draw_example_rna_seq(plotter, save_dir): from src.rna_seq_plotter import get_strand_colors apply_global_settings(linewidth=2.5) span = 252000, 255500 rna_plotter = plotter.rna_seq_plotter orf_plotter = plotter.orfs_plotter orfs = plotter.orfs rna_plotter.set_span_chrom(span, 6) orf_plotter.set_span_chrom(span, 6) fig = plt.figure(figsize=(7, 6)) grid_size = (4, 4) orf_ax = plt.subplot2grid(grid_size, (0, 0), colspan=4, rowspan=1) ax = plt.subplot2grid(grid_size, (1, 0), colspan=4, rowspan=1) leg_ax = plt.subplot2grid(grid_size, (2, 0), colspan=4, rowspan=2) fig.tight_layout(rect=[0.05, 0.03, 0.95, 0.945]) plt.subplots_adjust(hspace=0.25, wspace=0.5) custom_orfs = orfs[orfs.name.isin(['RPN12', 'HXK1'])] custom_orfs = custom_orfs.reset_index(drop=True) custom_orfs['orf_name'] = '' orf_plotter.plot_orf_annotations(orf_ax, orf_classes=['Verified'], custom_orfs=custom_orfs, should_auto_offset=False) rna_plotter.plot(ax=ax) orf_ax.set_ylim(-60, 60) ax.set_xlabel('Position (bp)', fontsize=24) offset = 390 column_spacing = 750 line_len = 400 strand_spacing = 1800 txt_space = 50 color_maps = list(reversed(get_strand_colors())) times = rna_plotter.times strands = 'Watson', 'Crick' y_start = 2 ax.tick_params(axis='y', labelsize=16, zorder=20) for strand_i in range(2): time_i = 0 for column in range(2): for y in range(3): y_plot = y_start - y color = color_maps[strand_i][time_i] x_start = offset + strand_i * strand_spacing + column * column_spacing x_end = offset + line_len + strand_i * strand_spacing + column * column_spacing leg_ax.plot([x_start, x_end], [y_plot, y_plot], lw=4, color=color) leg_ax.text(x_start - txt_space, y_plot, "%s'" % str(times[time_i]), ha='right', va='center', fontdict={ 'fontname': 'Open Sans', 'fontweight': 'regular' }, fontsize=14) time_i += 1 leg_ax.text(offset + strand_i * strand_spacing + strand_spacing / 4., 2.8, strands[strand_i], ha='center', va='bottom', fontdict={ 'fontname': 'Open Sans', 'fontweight': 'regular' }, fontsize=16) span_width = span[1] - span[0] leg_ax.plot([20, 520], [6, 6], lw=24, color='#707070', solid_capstyle='butt') leg_ax.text(50, 6, '500 nt', ha='left', va='center', color='white', fontdict={ 'fontname': 'Open Sans', 'fontweight': 'regular' }, fontsize=16) leg_ax.set_xlim(0, span_width) leg_ax.set_ylim(-3, 7) leg_ax.axis('off') plt.savefig('%s/example_rna_seq.pdf' % save_dir, transparent=True) apply_global_settings()
def plot_antisense_calling(gene_name, rna_seq_pileup): from src.rna_seq_plotter import get_smoothing_kernel from src.plot_utils import apply_global_settings from src.utils import get_orf from src.transcription import filter_rna_seq from src.transcription import filter_rna_seq_pileup from src.transcript_boundaries import load_park_boundaries from src.plot_orf_annotations import ORFAnnotationPlotter from config import paper_orfs from src.reference_data import read_sgd_orfs, read_park_TSS_PAS from src.datasets import read_orfs_data all_orfs = read_sgd_orfs() all_orfs = all_orfs.join(read_park_TSS_PAS()[['TSS', 'PAS']]) orfs_plotter = ORFAnnotationPlotter(orfs=all_orfs) antisense_boundaries = read_orfs_data('%s/antisense_boundaries_computed.csv' % rna_dir) park_boundaries = load_park_boundaries() park_boundaries = park_boundaries.join(paper_orfs[['name']]) orf = get_orf(gene_name, park_boundaries) search_2 = 1000 span = orf.transcript_start-search_2, orf.transcript_stop+search_2 gene_pileup = filter_rna_seq_pileup(rna_seq_pileup, span[0], span[1], orf.chr) plot_span = span gene = orf gene_rna_seq = gene_pileup apply_global_settings(30) fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6, 5.)) fig.tight_layout(rect=[0.1, 0, 1, 0.85]) orfs_plotter.set_span_chrom(plot_span, gene.chr) orfs_plotter.plot_orf_annotations(ax1) sense_data = gene_rna_seq[gene_rna_seq.strand == '+'] antisense_data = gene_rna_seq[gene_rna_seq.strand == '-'] sense_data = np.log2(sense_data.groupby('position').sum()+1).pileup antisense_data = np.log2(antisense_data.groupby('position').sum()+1).pileup smooth_kernel = get_smoothing_kernel(100, 20) sense_strand = '+' if gene.strand == '+' else '-' antisense_strand = '+' if sense_strand == '-' else '-' x = sense_data.index sense_data = np.convolve(sense_data, smooth_kernel, mode='same') antisense_data = np.convolve(antisense_data, smooth_kernel, mode='same') ax2.plot(x, sense_data, color=plt.get_cmap('Blues')(0.5)) ax2.plot(x, -antisense_data, color=plt.get_cmap('Reds')(0.5)) ax2.set_xlim(*plot_span) ax2.set_ylim(-15, 15) ax2.axhline(0, color='black') if gene.name in antisense_boundaries.index: anti_gene = antisense_boundaries.loc[gene.name] y_plot = 0, 20 if gene.strand == '-' else -20, 0 ax2.plot([anti_gene.start, anti_gene.start], [y_plot[0], y_plot[1]], color='red', linewidth=2.5, solid_capstyle='butt') ax2.plot([anti_gene.stop, anti_gene.stop], [y_plot[0], y_plot[1]], color='red', linewidth=2.5, solid_capstyle='butt') ax2.set_xticks(np.arange(plot_span[0], plot_span[1], 500)) ax2.set_xticklabels([]) _ = ax2.set_xticks(np.arange(plot_span[0], plot_span[1], 100), minor=True) ax2.tick_params(labelsize=14) ax2.set_ylabel("Sum log$_2$ (pileup+1)", fontsize=15) ax2.set_xlabel("Position (bp)", fontsize=15) ax1.set_title("Calling antisense transcripts", fontsize=26) ax2.axvline(383344) ax2.axvline(384114)
def plot_bar_counts(antisense_TPM_logfold, transcript_rate_logfold, time=120.0): """ Plot the number of genes that lie in each antisense sense bucket for the given time """ data = antisense_TPM_logfold.join(transcript_rate_logfold, lsuffix='_antisense_x_logfold', rsuffix='_xrate', how='inner') apply_global_settings(titlepad=20) time_str = str(time) # to calculate inclusive/exclusive values correctly epsilon = 1e-10 spans = [ (float('-inf'), -2-epsilon), (-2-epsilon, 2+epsilon), (2+epsilon, float('inf')), ] names = [ 'Decreased, <-2', 'Unchanged, [-2, 2]', 'Increased, >2' ] blues = plt.get_cmap('Blues') reds = plt.get_cmap('Reds') grays = plt.get_cmap('Greys') colors = [ blues(0.35), blues(0.5), blues(0.65), grays(0.35), grays(0.5), grays(0.65), reds(0.35), reds(0.5), reds(0.65) ] facecolors = [ blues(0.35), blues(0.25), blues(0.65), grays(0.35), grays(0.25), grays(0.65), reds(0.35), reds(0.25), reds(0.65) ] fig, ax = plt.subplots(figsize=(7, 4.5)) fig.tight_layout(rect=[0.1, 0.1, 0.75, 0.85]) i = 0 sense_i = 0 ticks = [] for span_sense in spans: anti_i = 0 for span_antisense in spans: sense_k = '%s_xrate' % time_str anti_k = '%s_antisense_x_logfold' % time_str selected = data[(data[sense_k] >= span_sense[0]) & (data[sense_k] < span_sense[1]) & (data[anti_k] >= span_antisense[0]) & (data[anti_k] < span_antisense[1])] label = None if sense_i == 1: label = names[anti_i] x = sense_i*3 + anti_i*0.75 - 0.75 y = len(selected) plot_y = y if plot_y > 1250: plot_y = 1275 ax.text(x, plot_y+20, int(y), ha='center') color = colors[i] ax.bar(x, plot_y, color=color, label=label, width=.5, facecolor=facecolors[i], linewidth=2, edgecolor=color, hatch='\\\\', ) i+= 1 if anti_i == 1: ticks.append(x) anti_i += 1 sense_i += 1 ax.set_xticks(ticks) ax.set_xticklabels(names, rotation=0, ha='center') ax.tick_params(axis='x', length=0, pad=10) ax.set_title('') ax.set_ylim(0, 1400) ax.set_ylabel('# of genes', labelpad=0) ax.set_xlabel('Sense transcription', labelpad=10) yticks = np.arange(0, 1400, 200) ax.set_yticks(yticks) yticklabels = [str(y) for y in yticks] yticklabels = yticklabels[:-1] + [('>' + yticklabels[-1])] ax.set_yticklabels(yticklabels) ax.legend(loc=2, title='Antisense transcripts', bbox_to_anchor=(1.0, 1.0), frameon=False) for i in range(2): ax.axvline(i*3+1.5, color='#F0F0F0', lw=2) ax.set_title("Frequency of sense and\nantisense transcription, 0-120 min", fontsize=18) ax.plot([2.67, 3.305], [1115, 1185], lw=4, color='white') ax.plot([2.67, 3.305], [1100, 1170], lw=2, color=grays(0.5)) ax.plot([2.67, 3.305], [1130, 1200], lw=2, color=grays(0.5))
def plot_tf_heatmap(small_peaks, lim=5, is_high=True): apply_global_settings(titlepad=15) fig, ax = plt.subplots(1, 1, figsize=(5, 4)) fig.tight_layout(rect=[0.1, 0.1, 0.9, 0.9]) datastore = small_peaks.datastore if is_high: selected_tfs = small_peaks.tf_mean_means.tail(small_peaks.view_high) highlighted = small_peaks.selected_high_tfs.index.values else: selected_tfs = small_peaks.tf_mean_means.head(small_peaks.view_low) highlighted = small_peaks.selected_low_tfs.index.values all_xrates = datastore.transcript_rate_logfold # select which orfs all_motifs = small_peaks.all_motifs selected = small_peaks.tf_set[(small_peaks.tf_set.index.isin(selected_tfs.index))] selected = selected[::-1] # collect the regulon for the TF regulon_xrate = small_peaks.mean_regulon_xrate\ .loc[selected.index.values] # transcription rate of the TF xrate = all_xrates.loc[selected.orf_name] # average occupancy of peaks (scale to similar values to xrates) bins = small_peaks.tf_means_df.loc[selected.index]*small_peaks.bin_scale zeros = np.zeros((len(bins), 1)) data = np.concatenate([xrate.values, zeros, bins.values, zeros, regulon_xrate.values], axis=1) ax.imshow(data, vmin=-small_peaks.im_scale, vmax=small_peaks.im_scale, cmap='RdBu_r', origin='lower', extent=[0, data.shape[1], 0, data.shape[0]], aspect=15./data.shape[1]) ax.set_xlim(-0.1, data.shape[1]+0.1) ax.set_ylim(-0.1, data.shape[0]+0.1) hide_spines(ax) tfs = [n.title() for n in selected.index] ax.set_yticks(np.arange(len(selected))+0.5) ax.set_yticklabels(tfs) ax.set_xticks([3, 10, 17]) ax.set_xticklabels(['Transcription', 'Binding\noccupancy', 'Regulon\ntranscription']) ax.tick_params(axis='y', length=0, pad=2, labelsize=10) ax.tick_params(axis='x', length=0, pad=4, labelsize=11.5) title_prefix = "increased" if is_high else "decreased" ax.set_title("Transcription factors\nwith %s occupancy" % title_prefix, fontsize=16) for x in [6, 13]: plot_rect(ax, x, 0, 1, len(data), color='white', fill=True, joinstyle='miter') for x in [0, 7, 14]: plot_rect(ax, x, 0, 6, len(data), edgecolor='black', lw=2, fill=False, joinstyle='miter') if is_high: color = red() else: color = blue() for ticklabel in ax.get_yticklabels(): if ticklabel.get_text().upper() in highlighted: ticklabel.set_color(color)
def plot_bar(self, activated_genes=True, title=None): if not activated_genes: title_cat = 'decrease' else: title_cat = 'increase' if title is None: title = ("Greatest %s in various\nchromatin scores, N=300" % title_cat) plot_utils.apply_global_settings(30) # df = self.collect_counts() df = self.terms_res df = df[['Promoter small fragments', 'Nucleosome disorganization', 'Combined chromatin']] df = df[df.sum(axis=1) > 0] sorted_idx = df.max(axis=1).sort_values(ascending=True).index df = df.loc[sorted_idx] df = df.tail(8) prom_sm_vals = df['Promoter small fragments'].values disog_vals = df['Nucleosome disorganization'].values both = df['Combined chromatin'].values y = np.arange(len(prom_sm_vals)) height = 0.225 spacing = 0.05 fig, ax = plt.subplots(figsize=(11, 14)) fig.tight_layout(rect=[0.35, 0.15, 0.90, 0.87]) fig.patch.set_alpha(0.0) if activated_genes: reds = plt.get_cmap('Reds') colors = [reds(0.5),reds(0.25), reds(0.8)] edgecolors = [reds(0.8), reds(0.6), reds(0.8),] else: blues = plt.get_cmap('Blues') colors = [blues(0.5),blues(0.3), blues(0.9)] edgecolors = [blues(0.9),blues(0.7), blues(0.9)] prom_y = y + (height+spacing) dis_y = y both_y = y - (height+spacing) rects1 = ax.barh(prom_y, prom_sm_vals, height, label='Small fragment occupancy', color=colors[0], alpha=1) for i in range(len(dis_y)): ax.barh(y=dis_y[i], width=disog_vals[i], height=height, label='Nucleosome\ndisorganization' if i == 0 else None, color=colors[2]) for i in range(len(both_y)): ax.barh(both_y[i], both[i], height, label='Combined' if i == 0 else None, color=colors[1], facecolor=colors[1], edgecolor=edgecolors[1], hatch='\\\\', alpha=1, linewidth=2) group_vals = [prom_sm_vals, disog_vals, both] group_ys = [prom_y, dis_y, both_y] # determine scale to offset labels max_val = df.max().max() inc = max_val / 100. for g in range(3): vals = group_vals[g] ys = group_ys[g] for i in range(len(vals)): val = vals[i] if val > inc: ax.text(val + inc, ys[i], ("10$^{-%0.1f}$" % val), va='center', fontsize=14, fontdict={'family':'Open Sans'}) # Add some text for labels, title and custom x-axis tick labels, etc. ax.legend(loc=4, bbox_to_anchor=(0.5, -0.25), frameon=False, fontsize=18) ax.set_yticks(np.arange(len(df))) for y in np.arange(1, len(prom_sm_vals)): ax.axhline(y=(y-0.5), color='#D0D0D0', linewidth=1) terms = df.index.values terms = [t[0:1].upper() + t[1:] for t in terms] new_terms = [] for t in terms: if t == "Maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)": t = "Maturation of SSU-rRNA" t_spl = t.split(' ') if len(t) > 60: new_terms.append( ' '.join(t_spl[:3]) + '\n' + ' '.join(t_spl[3:7]) + '\n' + ' '.join(t_spl[7:])) elif len(t) > 30: new_terms.append(' '.join(t_spl[:2]) + '\n' + ' '.join(t_spl[2:])) else: new_terms.append(t) terms = new_terms ax.set_yticklabels(terms) plot_utils.format_ticks_font(ax) plot_utils.format_ticks_font(ax, which='y', fontsize=12) max_fdr = self.terms_res.max().max() if activated_genes: ticks = np.arange(0, 6, 1) else: ticks = np.arange(0, 100, 20) ax.set_xlim(0, round(max_fdr+15)) ax.set_xticks(ticks) ax.set_title(title, fontsize=30) ax.set_xticklabels(-ticks) ax.set_xlabel('log$_{10}$ FDR', fontsize=20) ax.tick_params(axis='y', labelsize=18, length=0, pad=20) ax.tick_params(axis='x', labelsize=16, pad=10)
def plot_figure_setup(self): """ Setup figure for time series subplots with connecting plots between """ # configuration times = self.times n = len(times) titlepad = 10 self.linewidth = 2 plot_utils.apply_global_settings(titlepad=titlepad, linewidth=self.linewidth, dpi=self.dpi) figwidth = self.figwidth show_rna = self.show_rna show_orfs = self.show_orfs figsize = (figwidth, None) plot_span = self.span add_rows = int(show_orfs + show_rna) nrows = n + add_rows grid_size = (nrows * 3 - 1, 1) # default fig width and grid height if figsize is None: figsize = (23, grid_size[0]) # set fig height to the grid height elif figsize[1] is None: figsize = (figsize[0], grid_size[0]) fig = plt.figure(figsize=figsize) ax0 = plt.subplot2grid(grid_size, (0, 0), colspan=4, rowspan=2) ax0.set_xlim(*plot_span) time_axes = [] tween_axes = [] rna_ax, orf_ax = None, None if show_orfs: orf_ax = ax0 else: time_axes.append(ax0) for i in range(0, nrows - 1): y = 2 + i * 3 tween_ax = plt.subplot2grid(grid_size, (y, 0), colspan=4, rowspan=1, zorder=0) time_ax = plt.subplot2grid(grid_size, (y + 1, 0), colspan=4, rowspan=2, zorder=0.1) tween_ax.set_xlim(plot_span[0], plot_span[1]) tween_ax.set_ylim(0, 10) time_ax.set_xlim(plot_span[0], plot_span[1]) time_ax.set_ylim(0, 250) tween_ax.axis('off') tween_ax.xaxis.set_visible(False) if i == 0 and show_rna: rna_ax = time_ax leg_ax = tween_ax else: # between time subplots if i > 1 or not show_rna: tween_axes.append(tween_ax) # time subplot time_axes.append(time_ax) if True: draw_legend(leg_ax, plot_span) # more padding for title for smaller plots if n == 3: fig.tight_layout(rect=[0.075, 0.1, 0.95, 0.93]) else: fig.tight_layout(rect=[0.075, 0.1, 0.95, 0.945]) plt.subplots_adjust(hspace=0.0, wspace=0.5) time_axes[-1].set_xlabel("Position (bp)", fontsize=24) if len(time_axes) > 2: label_idx = max(len(time_axes) / 2 - 1, 1) time_axes[label_idx].set_ylabel("Fragment length (bp)", fontsize=24, labelpad=10) return fig, time_axes, tween_axes, orf_ax, rna_ax
def plot_distribution(x_data, y_data, xlabel, ylabel, highlight=[], title=None, xlim=(-2.5, 2.5), ylim=(-6, 10), xstep=2, ystep=2, pearson=True, ha='right', va='bottom', plot_aux='cross', groups={}, highlight_format={}, aux_lw=1.5, s=5, markersize=53, ax=None, text_offset=None, tight_layout=None, dpi=300, bw=None, plot_lr=False, titlesize=18, xticks=None, yticks=None, plot_minor=True): apply_global_settings(10, dpi=dpi) plot_default_ax = ax is None if ax is None: fig = plt.figure(figsize=(6.5, 6.5)) fig.patch.set_alpha(0.0) grid_len = 9 grid_size = (grid_len, grid_len) ax = plt.subplot2grid(grid_size, (1, 0), colspan=grid_len - 1, rowspan=grid_len - 1) tax = plt.subplot2grid(grid_size, (0, 0), colspan=grid_len - 1, rowspan=1) rax = plt.subplot2grid(grid_size, (1, grid_len - 1), colspan=1, rowspan=grid_len - 1) else: tax = None rax = None if len(groups) > 0 and plot_default_ax: fig.tight_layout(rect=[0.14, 0.15, 0.9, 0.9]) if tight_layout is not None: fig.tight_layout(rect=tight_layout) if plot_default_ax: plt.subplots_adjust(hspace=0.05, wspace=0.04) if plot_default_ax: if bw is None: bw = [0.15, 0.15] xspan_diff = xlim[1] - xlim[0] yspan_diff = xlim[1] - xlim[0] y = plot_density(x_data, ax=tax, arange=(xlim[0], xlim[1], xspan_diff * 1e-3), bw=xspan_diff * 1e-2, fill=True, color='#a0a0a0') y_max = np.max(y) tax.set_xlim(*xlim) tax.set_ylim(y_max * -1e-1, y_max * 1.5) x = plot_density(y_data, ax=rax, arange=(ylim[0], ylim[1], yspan_diff * 1e-3), bw=yspan_diff * 1e-2, flip=True, fill=True, color='#a0a0a0') x_max = np.max(x) rax.set_ylim(*ylim) rax.set_xlim(x_max * -1e-1, x_max * 1.5) hide_spines(rax) hide_spines(tax) plot_density_scatter(x_data, y_data, s=s, bw=bw, ax=ax, cmap=parula(), alpha=1., zorder=20) plot_rect(ax, xlim[0], ylim[0], xlim[1] - xlim[0], ylim[1] - ylim[0], 'white', fill_alpha=0.5, zorder=90) for group_name, group in groups.items(): group_orfs = group['orfs'] group_x = x_data[x_data.index.isin(group_orfs)] group_y = y_data[y_data.index.isin(group_orfs)] ax.scatter(group_x, group_y, s=53, facecolor='none', color=group['color'], zorder=98, marker='D', linewidth=1.5, label=group_name, rasterized=True) for gene_name in highlight: orf_name = get_orf_name(gene_name) if orf_name not in x_data.index: continue selected_x = x_data.loc[orf_name] selected_y = y_data.loc[orf_name] if selected_x > xlim[1] or selected_x < xlim[0]: continue if selected_y > ylim[1] or selected_y < ylim[0]: continue marker = 'D' color = '#c43323' facecolor = 'none' if gene_name in highlight_format.keys(): gene_fmt = highlight_format[gene_name] if 'marker' in gene_fmt.keys(): marker = gene_fmt['marker'] if 'color' in gene_fmt.keys(): color = gene_fmt['color'] if 'filled' in gene_fmt.keys(): facecolor = color ax.scatter(selected_x, selected_y, s=markersize, facecolor=facecolor, color=color, zorder=98, marker=marker, linewidth=1.5) if text_offset is None: text_offset = (xlim[1] - xlim[0]) * 5e-3 offsets = text_offset, text_offset cur_ha = ha cur_va = va if gene_name in highlight_format.keys(): cur_hl_fmt = highlight_format[gene_name] cur_ha = cur_hl_fmt['ha'] if 'ha' in cur_hl_fmt.keys() else ha cur_va = cur_hl_fmt['va'] if 'va' in cur_hl_fmt.keys() else va if cur_ha == 'right': offsets = -text_offset, offsets[1] elif cur_ha == 'left': offsets = text_offset, offsets[1] elif cur_ha == 'center': offsets = 0, offsets[1] if cur_va == 'top': offsets = offsets[0], -text_offset elif cur_va == 'bottom': offsets = offsets[0], text_offset text = ax.text(selected_x + offsets[0], selected_y + offsets[1], gene_name, fontdict={ 'fontname': 'Open Sans', 'fontweight': 'regular', 'style': 'italic' }, fontsize=12, ha=cur_ha, va=cur_va, zorder=99) text.set_path_effects([ path_effects.Stroke(linewidth=3, foreground='white'), path_effects.Normal() ]) if xticks is None: xticks = xlim[0], xlim[1] + xstep, xstep ax.set_xticks(np.arange(*xticks)) if yticks is None: yticks = ylim[0], ylim[1] + ystep, ystep ax.set_yticks(np.arange(*yticks)) if xstep < 5 and plot_minor: ax.set_xticks(np.arange(xticks[0], xticks[1], 1), minor=True) if ystep < 5 and plot_minor: ax.set_yticks(np.arange(yticks[0], yticks[1], 1), minor=True) ax.tick_params(axis='x', pad=5, labelsize=15) ax.tick_params(axis='y', pad=5, labelsize=15) ax.set_xlim(*xlim) ax.set_ylim(*ylim) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if len(groups) > 0: ax.legend(loc=1, bbox_to_anchor=(0.475, -0.2), frameon=False, fontsize=14) if plot_aux == 'cross' or plot_aux == 'both': ax.axvline(0, linestyle='solid', color='#505050', linewidth=aux_lw, zorder=97) ax.axhline(0, linestyle='solid', color='#505050', linewidth=aux_lw, zorder=97) if plot_aux == 'diag' or plot_aux == 'both': ax.plot([xlim[0] * 2, xlim[1] * 2], [xlim[0] * 2, xlim[1] * 2], linestyle='solid', color='#505050', linewidth=aux_lw, zorder=97) if pearson: from src.math_utils import convert_to_latex_sci_not cor, pval = pearsonr(x_data, y_data) pval = convert_to_latex_sci_not(pval) title = ("%s\nPearson's r=%.2f, p=%s" % (title, cor, pval)) if plot_lr: # plot linear regression reg, coef, data, y_vals = get_linear_model_coef(x_data, y_data) b, m = tuple(coef) s = np.arange(-100, 100) t = reg.predict(s.reshape(len(s), 1)) ax.plot(s, t, zorder=100, c='gray', linestyle='dashed', lw=1.5) from sklearn.metrics import r2_score true = y_data predicted = reg.predict(x_data.values.reshape(len(x_data), 1)) r2 = r2_score(true, predicted) title = ("%s, $R^2$=%.2f" % (title, r2)) if plot_default_ax: tax.set_title(title, fontsize=titlesize) else: ax.set_title(title, fontsize=titlesize) return ax
def plot_ends_heatmap(orf_0_nuc_mid_counts, orf_120_nuc_mid_counts, orf_0_nuc_start_counts, orf_120_nuc_start_counts, orf_0_nuc_stop_counts, orf_120_nuc_stop_counts, head=None, tail=None): apply_global_settings(titlepad=10) mids = [orf_0_nuc_mid_counts, orf_120_nuc_mid_counts] starts = [orf_0_nuc_start_counts, orf_120_nuc_start_counts] ends = [orf_0_nuc_stop_counts, orf_120_nuc_stop_counts] nuc_groups = [starts, mids, ends] names = ['Left', 'Middle', 'Right'] fig = plt.figure(figsize=(6, 5)) grid_size = (3, 3) rows, cols = 3, 3 ax0 = plt.subplot2grid(grid_size, (0, 0), colspan=1, rowspan=2) ax1 = plt.subplot2grid(grid_size, (0, 1), colspan=1, rowspan=2) ax2 = plt.subplot2grid(grid_size, (0, 2), colspan=1, rowspan=2) axs = [ax0, ax1, ax2] origins = [-50, 0, 50] ax0 = plt.subplot2grid(grid_size, (2, 0), colspan=1, rowspan=1) ax1 = plt.subplot2grid(grid_size, (2, 1), colspan=1, rowspan=1) ax2 = plt.subplot2grid(grid_size, (2, 2), colspan=1, rowspan=1) axs2 = [ax0, ax1, ax2] fig.tight_layout(rect=[0.075, 0.1, 0.95, 0.945]) plt.subplots_adjust(hspace=0.1, wspace=0.3) (ax1, ax2, ax3) = axs for i in range(len(axs)): ax = axs[i] group_120 = nuc_groups[i][1] group_0 = nuc_groups[i][0] data = group_120 - group_0 if head is not None: data = data.head(head) elif tail is not None: data = data.tail(tail) ax.imshow(data, vmin=-5, vmax=5, aspect=300. / len(data), cmap='RdBu_r', extent=[-500, 500, 0, len(data)]) ax.set_xlim(-50 + origins[i], 150 + origins[i]) ax.set_yticks([]) ax.set_xticks([]) ax.set_title(names[i]) ax.axvline(0, color='black', linestyle='dashed', linewidth=1) plot_ends_comparison(axs2, orf_0_nuc_mid_counts, orf_120_nuc_mid_counts, orf_0_nuc_start_counts, orf_120_nuc_start_counts, orf_0_nuc_stop_counts, orf_120_nuc_stop_counts, head=head, tail=tail) if head is not None: topbot = "downstream" headtail = head else: topbot = "upstream" headtail = tail plt.suptitle("Greatest %d %s nucleosome\nfragments shift, 0-120 min" % (headtail, topbot))