def size_dist(inputs, paths_in, paths_out): files = inputs['files'] path_figure = paths_out['path_figures'] plot_num = 0 sns.set_style("white") plt.figure(figsize=(5, 3)) naming = '' for fname in files: naming += fname + '_' path_analysis = paths_out['path_analysis'] + fname + '/readQC/' data = ribo_util.unPickle(path_analysis + 'read_distribution') df = ribo_util.dict_to_df(data, 'Length', 'fraction of total') plt.plot(df, label=fname) plt.title('Size Distribution') plt.xlabel("Read Length") plt.ylabel("Percent of Reads") plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) plt.savefig(path_figure + 'Comparison/size_dist' + '/sizedist_' + naming + '.pdf', dpi=400, bbox_inches="tight") plt.show() for fname in files: sns.set_style("white") plt.figure(figsize=(5, 3)) path_analysis = paths_out['path_analysis'] + fname + '/readQC/' data = ribo_util.unPickle(path_analysis + 'read_distribution') df = ribo_util.dict_to_df(data, 'Length', 'fraction of total') plt.plot(df, label=fname) plt.title('Size Distribution') plt.xlabel("Read Length") plt.ylabel("Percent of Reads") plt.legend(loc='upper right') plt.savefig(path_figure + fname + '/sizedist.pdf', dpi=400, bbox_inches="tight") size_plot_csv = pd.DataFrame(df) size_plot_csv.to_csv(path_analysis + 'size_plot_values.csv') plt.gcf().clear()
def plot_avggene(inputs, paths_in, paths_out, settings, settings_plot): files = inputs['files'] shift = settings_plot['shift'] hmmax = settings_plot['HM_max'] ymax = settings_plot['ymax'] path_figure = paths_out['path_figures'] minlength = settings['minlength'] maxlength = settings['maxlength'] length_in_ORF = settings['length_in_ORF'] length_out_ORF = settings['length_out_ORF'] density_type = settings['density_type'] next_gene = settings['next_gene'] equal_weight = settings['equal_weight'] threshold = settings['threshold'] minlength_1 = str(minlength) + '_' maxlength_1 = str(maxlength) + '_' length_in_ORF_1 = str(length_in_ORF) + '_' length_out_ORF_1 = str(length_out_ORF) + '_' density_type_1 = density_type + '_' next_gene_1 = str(next_gene) + '_' equal_weight_1 = equal_weight + '_' threshold_1 = str(threshold) + '_' name_settings = length_in_ORF_1 + length_out_ORF_1 + next_gene_1 + threshold_1 name_settings += density_type_1 + equal_weight_1 + minlength_1 + maxlength_1 for fname in files: plot_num = 0 sns.set_style("white") plt.figure(figsize=(20, 5)) path_analysis = paths_out['path_analysis'] + fname + '/avggenes/' data_start = ribo_util.unPickle(path_analysis + 'avg_start_' + name_settings + '_all') data_stop = ribo_util.unPickle(path_analysis + 'avg_stop_' + name_settings + '_all') data_startHM = ribo_util.unPickle(path_analysis + 'avg_start_' + name_settings + '_HM') data_stopHM = ribo_util.unPickle(path_analysis + 'avg_stop_' + name_settings + '_HM') xmax = len(data_start.keys()) data_start = ribo_util.dict_to_df(data_start, 'Position', 'Reads') data_stop = ribo_util.dict_to_df(data_stop, 'Position', 'Reads') data_startHM = ribo_util.heatmapdict_to_df(data_startHM, 'Length', 'Position', 'composition') data_stopHM = ribo_util.heatmapdict_to_df(data_stopHM, 'Length', 'Position', 'composition') data_startHM = data_startHM.reindex(index=data_startHM.index[::-1]) data_stopHM = data_stopHM.reindex(index=data_stopHM.index[::-1]) data_start.to_csv(path_analysis + 'start_all.csv') data_stop.to_csv(path_analysis + 'stop_all.csv') max_start = data_start["Reads"].max() max_stop = data_stop["Reads"].max() if ymax == 0: if max_start > max_stop: ymax = max_start else: ymax = max_stop for graph in ['Start', 'Stop']: plot_num += 1 if graph == 'Start': data = data_start elif graph == 'Stop': data = data_stop plt.subplot(2, 2, plot_num) plt.plot( data, sns.xkcd_rgb["dark grey"], ) plt.title(fname + ' ' + graph) plt.ylabel("Reads") plt.ylim(0, ymax) plt.xlim(0, xmax) sns.despine() for graph in ['startHM', 'stopHM']: plot_num += 1 if graph == 'startHM': dataHM = data_startHM elif graph == 'stopHM': dataHM = data_stopHM plt.subplot(2, 2, plot_num) plot = sns.heatmap(dataHM, cmap="ocean_r", vmin=0, vmax=hmmax, cbar=False) plt.setp(plot.get_xticklabels(), visible=False) plt.setp(plot.get_xticklabels()[0::10], visible=True) plt.setp(plot.get_yticklabels(), visible=False) plt.setp(plot.get_yticklabels()[0::4], visible=True) plt.savefig(path_figure + fname + '/avggene_' + name_settings + '.pdf', dpi=400) plt.show()
def plot_avggene_end(inputs, paths_in, paths_out, settings): files = inputs['files'] shift = settings['shift'] hmmax = settings['HM_max'] for fname in files: plot_num = 0 plt.figure(figsize=(20, 5)) path_analysis = paths_out['path_analysis'] + fname + '/' data_start = ribo_util.unPickle(path_analysis + 'avg_start_all_end') data_stop = ribo_util.unPickle(path_analysis + 'avg_stop_all_end') data_startHM = ribo_util.unPickle(path_analysis + 'avg_start_HM_end') data_stopHM = ribo_util.unPickle(path_analysis + 'avg_stop_HM_end') xmax = len(data_start.keys()) data_start = ribo_util.dict_to_df(data_start, 'Position', 'Reads') data_stop = ribo_util.dict_to_df(data_stop, 'Position', 'Reads') data_startHM = ribo_util.heatmapdict_to_df(data_startHM, 'Length', 'Position', 'composition') data_stopHM = ribo_util.heatmapdict_to_df(data_stopHM, 'Length', 'Position', 'composition') data_start.to_csv(path_analysis + 'start_all.csv') data_stop.to_csv(path_analysis + 'stop_all.csv') max_start = data_start["Reads"].max() max_stop = data_stop["Reads"].max() if max_start > max_stop: ymax = max_start else: ymax = max_stop for graph in ['Start', 'Stop']: plot_num += 1 if graph == 'Start': data = data_start elif graph == 'Stop': data = data_stop plt.subplot(2, 2, plot_num) plt.plot( data, sns.xkcd_rgb["dark grey"], ) plt.title(fname + ' ' + graph) plt.ylabel("Reads") plt.ylim(0, ymax) plt.xlim(0, xmax) sns.despine() for graph in ['startHM', 'stopHM']: plot_num += 1 if graph == 'startHM': dataHM = data_startHM elif graph == 'stopHM': dataHM = data_stopHM plt.subplot(2, 2, plot_num) plot = sns.heatmap(dataHM, cmap="ocean_r", vmin=0, vmax=hmmax, cbar=False) plt.setp(plot.get_xticklabels(), visible=False) plt.setp(plot.get_xticklabels()[0::10], visible=True) plt.setp(plot.get_yticklabels(), visible=False) plt.setp(plot.get_yticklabels()[0::4], visible=True) plt.show()