def make_qualimap_plots(qmglobals=None, coverage_per_contig=None): """Make qualimap summary plots""" retval = { "fig": {"coverage_per_contig": None, "globals": None}, "file": {"coverage_per_contig": coverage_per_contig, "globals": qmglobals}, "uri": {"coverage_per_contig": data_uri(coverage_per_contig), "globals": data_uri(qmglobals)}, } # Globals if qmglobals is not None: df_all = pd.read_csv(qmglobals) df_all["Sample"] = df_all["Sample"].astype("str") fig = figure( y_range=[0, max(df_all["number of reads"])], title="Mapping summary", title_text_font_size="12pt", plot_width=400, plot_height=400, x_range=sorted(list(set(df_all["Sample"]))), ) mdotplot( fig, x="Sample", size=10, df=df_all, alpha=0.5, y=["number of reads", "number of mapped reads", "number of duplicated reads", "number of unique reads"], ) xaxis(fig, axis_label="sample", major_label_orientation=np.pi / 3, axis_label_text_font_size="10pt") yaxis(fig, axis_label="count", major_label_orientation=1, axis_label_text_font_size="10pt") retval["fig"]["globals"] = fig # Coverage per contig if coverage_per_contig is not None: df_all = pd.read_csv(coverage_per_contig, index_col=0) df_all["Sample"] = df_all["Sample"].astype("str") fig = figure(width=300, height=300) points( fig, x="chrlen_percent", y="mapped_bases_percent", df=df_all, glyph="text", text="chr", text_font_size="8pt" ) main(fig, title_text_font_size="8pt") xaxis(fig, axis_label="Chromosome length of total (%)", axis_label_text_font_size="8pt") yaxis(fig, axis_label="Mapped bases of total (%)", axis_label_text_font_size="8pt") gp = facet_grid( fig, x="chrlen_percent", y="mapped_bases_percent", df=df_all, groups=["Sample"], width=300, height=300, share_x_range=True, share_y_range=True, title_text_font_size="12pt", ) for fig in [item for sublist in gp.children for item in sublist]: abline(fig, x="chrlen_percent", y="mapped_bases_percent", df=df_all, slope=1) retval["fig"]["coverage_per_contig"] = gp return retval
def plot_metrics(self, **kwargs): """Plot metrics wrapper Returns: plist (list): list of bokeh plot objects """ plist = [] for kw in self.plots: kwargs.update(kw['figure']) fig = figure(**kwargs) dotplot(fig, df=self, **kw['renderer']) xaxis(fig, **kw['xaxis']) yaxis(fig, **kw['yaxis']) plist.append(fig) return plist
def make_cutadapt_summary_plot(inputfile): df_summary = pd.read_csv(inputfile) df_summary["sample"] = df_summary["sample"].astype("str") TOOLS = "pan,wheel_zoom,box_zoom,box_select,reset,save" fig = figure(tools=TOOLS, width=400, height=400, x_range=sorted(list(set(df_summary["sample"]))), y_range=[0, 105], title="Cutadapt metrics", title_text_font_size='12pt') mdotplot(fig, x="sample", y=["read1_pct", "read2_pct"], df=df_summary, size=10, alpha=0.5) xaxis(fig, axis_label="sample", major_label_orientation=np.pi/3, axis_label_text_font_size='10pt') yaxis(fig, axis_label="percent reads", major_label_orientation=1, axis_label_text_font_size='10pt') return {'fig': fig, 'uri': data_uri(inputfile), 'file': inputfile}
def scrnaseq_pca_plots(pca_results_file=None, metadata=None, pcacomp=(1,2), pcaobjfile=None): """Make PCA QC plots for scrnaseq workflow Args: pca_results_file (str): pca results file metadata (str): metadata file name pcacomp (int): tuple of ints corresponding to components to draw pcaobjfile (str): file name containing pickled pca object Returns: dict: dictionary with keys 'fig' pointing to a (:py:class:`~bokeh.models.GridPlot`) Bokeh GridPlot object and key 'table' pointing to a (:py:class:`~bokeh.widgets.DataTable`) DataTable """ if not metadata is None: md = pd.read_csv(metadata, index_col=0) if not pcaobjfile is None: with open(pcaobjfile, 'rb') as fh: pcaobj = pickle.load(fh) df_pca = pd.read_csv(pca_results_file, index_col="sample") df_pca['color'] = ['red'] * df_pca.shape[0] df_pca['x'] = df_pca['0'] df_pca['y'] = df_pca['1'] source = ColumnDataSource(df_pca) TOOLS = "pan,wheel_zoom,box_zoom,box_select,resize,reset,save,hover" # Add radio button group cmap = colorbrewer(datalen = df_pca.shape[0], palette="RdYlBu") callback_rbg = CustomJS(args=dict(source=source), code=""" var data = source.get('data'); var active = cb_obj.get('active') var label = cb_obj.get('label')[active] var RdYlBu = { 3: ["#fc8d59","#ffffbf","#91bfdb"], 4: ["#d7191c","#fdae61","#abd9e9","#2c7bb6"], 5: ["#d7191c","#fdae61","#ffffbf","#abd9e9","#2c7bb6"], 6: ["#d73027","#fc8d59","#fee090","#e0f3f8","#91bfdb","#4575b4"], 7: ["#d73027","#fc8d59","#fee090","#ffffbf","#e0f3f8","#91bfdb","#4575b4"], 8: ["#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4"], 9: ["#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4"], 10: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"], 11: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"]}; var colormap = {}; var j = 0; for (i = 0; i < data['sample'].length; i++) { if (data[label][i] in colormap) { } else { colormap[data[label][i]] = j; j++; } } var nfac = Object.keys(colormap).length; if (nfac > 11) { nfac = 11; } if (nfac < 3) { nfac = 3; } var colors = RdYlBu[nfac]; for (i = 0; i < data[label].length; i++) { data['color'][i] = colors[colormap[data[label][i]]] } source.trigger('change'); """) callback = CustomJS(args=dict(source=source), code=""" var data = source.get('data'); var active = cb_obj.get('active'); var label = cb_obj.get('label'); var RdYlBu = { 3: ["#fc8d59","#ffffbf","#91bfdb"], 4: ["#d7191c","#fdae61","#abd9e9","#2c7bb6"], 5: ["#d7191c","#fdae61","#ffffbf","#abd9e9","#2c7bb6"], 6: ["#d73027","#fc8d59","#fee090","#e0f3f8","#91bfdb","#4575b4"], 7: ["#d73027","#fc8d59","#fee090","#ffffbf","#e0f3f8","#91bfdb","#4575b4"], 8: ["#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4"], 9: ["#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4"], 10: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"], 11: ["#a50026","#d73027","#f46d43","#fdae61","#fee090","#ffffbf","#e0f3f8","#abd9e9","#74add1","#4575b4","#313695"]}; var colormap = {}; if (!active) { var j = 0; for (i = 0; i < data['sample'].length; i++) { if (data[label][i] in colormap) { } else { colormap[data[label][i]] = j; j++; } } var nfac = Object.keys(colormap).length; if (nfac > 11) { nfac = 11; } if (nfac < 3) { nfac = 3; } var colors = RdYlBu[nfac]; for (i = 0; i < data[label].length; i++) { data['color'][i] = colors[colormap[data[label][i]]] } source.trigger('change'); } """) if not md is None: # Waiting for callbacks to be implemented upstream in bokeh # rbg = RadioButtonGroup(labels=list(md.columns), # callback=callback) toggle_buttons = [Toggle(label=x, callback=callback) for x in list(md.columns)] else: toggle_buttons = [] # rbg = RadioButtonGroup() # PC components xcallback = CustomJS(args=dict(source=source), code=""" var data = source.get('data'); var active = cb_obj.get('active') var value = cb_obj.get('value') x = data['x'] for (i = 0; i < x.length; i++) { x[i] = data[value][i] data['sample'][i] = value data['FileID'][i] = active } source.trigger('change'); """) ycallback = CustomJS(args=dict(source=source), code=""" var data = source.get('data'); var value = cb_obj.get('value') y = data['y'] for (i = 0; i < y.length; i++) { y[i] = data[value][i] } source.trigger('change'); """) pca_components = sorted([int(x) + 1 for x in source.column_names if re.match("\d+", x)]) menulist = [(str(x), str(x)) for x in pca_components] component_x = Dropdown(label = "PCA component x", menu = menulist, default_value="1", callback=xcallback) component_y = Dropdown(label = "PCA component y", menu = menulist, default_value="2", callback=ycallback) # Make the pca plot kwfig = {'plot_width': 400, 'plot_height': 400, 'title_text_font_size': "12pt"} p1 = figure(title="Principal component analysis", tools=TOOLS, **kwfig) points(p1, 'x', 'y', source=source, color='color', size=10, alpha=.7) kwxaxis = {'axis_label': "Component {} ({:.2f}%)".format( pcacomp[0], 100.0 * pcaobj.explained_variance_ratio_[pcacomp[0] - 1]), 'axis_label_text_font_size': '10pt', 'major_label_orientation': np.pi/3} kwyaxis = {'axis_label': "Component {} ({:.2f}%)".format( pcacomp[1], 100.0 * pcaobj.explained_variance_ratio_[pcacomp[1] - 1]), 'axis_label_text_font_size': '10pt', 'major_label_orientation': np.pi/3} xaxis(p1, **kwxaxis) yaxis(p1, **kwyaxis) tooltiplist = [("sample", "@sample")] if "sample" in source.column_names else [] if not md is None: tooltiplist = tooltiplist + [(str(x), "@{}".format(x)) for x in md.columns] tooltips(p1, HoverTool, tooltiplist) # Detected genes, FPKM and TPM p2 = figure(title="Number of detected genes", x_range=list(df_pca.index), tools=TOOLS, **kwfig) kwxaxis.update({'axis_label': "Sample"}) kwyaxis.update({'axis_label': "Detected genes"}) dotplot(p2, "sample", "FPKM", source=source) xaxis(p2, **kwxaxis) yaxis(p2, **kwyaxis) tooltips(p2, HoverTool, [('sample', '@sample'), ('# genes (FPKM)', '@FPKM')]) return {'fig':vform(*(toggle_buttons + [gridplot([[p1, p2]])]))}
def scrnaseq_alignment_qc_plots(rseqc_read_distribution=None, rseqc_gene_coverage=None, star_results=None): """Make alignment QC plots for scrnaseq workflow Args: rseqc_read_distribution (str): RSeQC read distribution results csv file rseqc_gene_coverage (str): RSeQC gene coverage results csv file star_results (str): star alignment results csv file Returns: dict: dictionary with keys 'fig' pointing to a (:py:class:`~bokeh.models.GridPlot`) Bokeh GridPlot object and key 'table' pointing to a (:py:class:`~bokeh.widgets.DataTable`) DataTable """ df_star = pd.read_csv(star_results, index_col="Sample") df_rseqc_rd = pd.read_csv(rseqc_read_distribution, index_col="Sample").reset_index().pivot_table(columns=["Group"], values=["Tag_count"], index=["Sample"]) df_rseqc_rd.columns = ["_".join(x) if isinstance(x, tuple) else x for x in df_rseqc_rd.columns] df_rseqc_gc = pd.read_csv(rseqc_gene_coverage, index_col="Sample") df_all = df_star.join(df_rseqc_rd) df_all = df_all.join(df_rseqc_gc['three_prime_map']) source = ColumnDataSource(df_all) columns = [ TableColumn(field="Sample", title="Sample"), TableColumn(field="Number_of_input_reads", title="Number of input reads"), TableColumn(field="Uniquely_mapped_reads_PCT", title="Uniquely mapped reads (%)"), TableColumn(field="Mismatch_rate_per_base__PCT", title="Mismatch rate per base (%)"), TableColumn(field="Insertion_rate_per_base", title="Insertion rate per base (%)"), TableColumn(field="Deletion_rate_per_base", title="Deletion rate per base (%)"), TableColumn(field="PCT_of_reads_unmapped", title="Unmapped reads (%)"), ] table = DataTable(source=source, columns=columns, editable=False, width=1000) TOOLS = "pan,wheel_zoom,box_zoom,box_select,lasso_select,resize,reset,save,hover" kwfig = {'plot_width': 400, 'plot_height': 400, 'title_text_font_size': "12pt"} kwxaxis = {'axis_label': 'Sample', 'major_label_orientation': np.pi/3} kwyaxis = {'axis_label_text_font_size': '10pt', 'major_label_orientation': np.pi/3} # Input reads p1 = figure(title="Number of input reads", x_range=list(df_all.index), tools=TOOLS, y_axis_type="log", **kwfig) dotplot(p1, "Sample", "Number_of_input_reads", source=source) xaxis(p1, **kwxaxis) yaxis(p1, axis_label="Reads", **kwyaxis) tooltips(p1, HoverTool, [('Sample', '@Sample'), ('Reads', '@Number_of_input_reads')]) # Uniquely mapping p2 = figure(title="Uniquely mapping reads", x_range=p1.x_range, y_range=[0, 100], tools=TOOLS, **kwfig) dotplot(p2, "Sample", "Uniquely_mapped_reads_PCT", source=source) xaxis(p2, **kwxaxis) yaxis(p2, axis_label="Percent", **kwyaxis) tooltips(p2, HoverTool, [('Sample', '@Sample'), ('Pct_mapped', '@Uniquely_mapped_reads_PCT')]) # Unmapped p3 = figure(title="Unmapped reads", x_range=p1.x_range, y_range=[0, 100], tools=TOOLS, **kwfig) dotplot(p3, "Sample", "PCT_of_reads_unmapped", source=source) xaxis(p3, **kwxaxis) yaxis(p3, axis_label="Percent", **kwyaxis) tooltips(p3, HoverTool, [('Sample', '@Sample'), ('Pct_unmapped', '@PCT_of_reads_unmapped')]) # Mismatch/indel rate p4 = figure(title="Mismatch and indel rates", x_range=p1.x_range, tools=TOOLS, **kwfig) dotplot(p4, "Sample", "Mismatch_rate_per_base__PCT", source=source, legend="Mismatch") dotplot(p4, "Sample", "Insertion_rate_per_base", source=source, legend="Insertion", color="red") dotplot(p4, "Sample", "Deletion_rate_per_base", source=source, legend="Deletion", color="green") xaxis(p4, **kwxaxis) yaxis(p4, axis_label="Percent", **kwyaxis) tooltips(p4, HoverTool, [('Sample', '@samples'), ('Mismatch rate per base', '@Mismatch_rate_per_base__PCT'), ('Insertion rate per base', '@Insertion_rate_per_base'), ('Deletion rate per base', '@Deletion_rate_per_base'), ]) select_tool = p4.select(dict(type=BoxSelectTool)) select_tool.dimensions = ['width'] # Unmapped p5 = figure(title="Mismatch/indel sum", x_range=p1.x_range, tools=TOOLS, **kwfig) dotplot(p5, "Sample", "mismatch_sum", source=source) xaxis(p5, **kwxaxis) yaxis(p5, axis_label="Percent", **kwyaxis) tooltips(p5, HoverTool, [('Sample', '@Sample'), ('Mismatch/indel rate per base', '@mismatch_sum'), ]) select_tool = p5.select(dict(type=BoxSelectTool)) select_tool.dimensions = ['width'] # Fraction reads mapping to 10% right-most end p6 = figure(title="Tags mapping to exons", x_range=p1.x_range, tools=TOOLS, **kwfig) dotplot(p6, "Sample", "Tag_count_ExonMap", source=source) xaxis(p6, **kwxaxis) yaxis(p6, axis_label="Percent", **kwyaxis) tooltips(p6, HoverTool, [('Sample', '@Sample'), ('ExonMap', '@Tag_count_ExonMap'), ]) # Fraction reads mapping to 10% right-most end p7 = figure(title="Reads mapping to 3' end", x_range=p1.x_range, tools=TOOLS, **kwfig) dotplot(p7, "Sample", "three_prime_map", source=source) xaxis(p7, **kwxaxis) yaxis(p7, axis_label="Percent", **kwyaxis) tooltips(p7, HoverTool, [('Sample', '@Sample'), ("3' map", '@three_prime_map'), ]) return {'fig': gridplot([[p1, p2, p3], [p4, p5, p6], [p7, None, None]]), 'table': table}