def atacseq_qualimap_plot_coverage_per_contig(df, **kwargs): p = points( df=df, x="chrlen_percent", y="mapped_bases_percent", glyph="circle", return_source=True, legend="top_right", tools=DEFAULT_TOOLS, size=6, alpha=0.9, color="SM", plot_height=600, plot_width=800, ) p.xaxis.axis_label = "chrlen (% of genome)" p.yaxis.axis_label = "mapped reads (% of total)" tooltips(p, HoverTool, [("Sample", "@SM"), ("Chromosome", "@chr")]) return p
def atacseq_qualimap_plot_coverage_per_contig(df, **kwargs): """Plot qualimap coverage per contig. Args: df(:class:`~pandas.DataFrame`): data frame holding summary of qualimap results. The summary is provided by :meth:`snakemakelib.odo.qualimap.resource_genome_results`. kwargs(dict): keyword arguments Returns: :class:`bokeh.plotting.figure`: bokeh scatter plot Examples: .. bokeh-plot:: :source-position: above import pandas as pd from bokeh.plotting import show from snakemakelib_workflows.atacseq.app import atacseq_qualimap_plot_coverage_per_contig df = pd.DataFrame([['S1', 'chr1', 4e6, 8e6, 2, 5, 80, 40], ['S1', 'chr2', 1e6, 12e6, 12, 20, 20, 60], ['S2', 'chr1', 4e6, 12e6, 3, 5, 80, 60], ['S2', 'chr2', 1e6, 8e6, 8, 20, 20, 40]], columns=['SM', 'chr','chrlen', 'mapped_bases', 'mean_coverage', 'sd', 'chrlen_percent','mapped_bases_percent']) p = atacseq_qualimap_plot_coverage_per_contig(df) show(p) """ p = points(df=df, x="chrlen_percent", y="mapped_bases_percent", glyph="circle", return_source=True, legend="top_right", tools=DEFAULT_TOOLS, size=6, alpha=.9, color="SM", plot_height=600, plot_width=800) p.xaxis.axis_label = "chrlen (% of genome)" p.yaxis.axis_label = "mapped reads (% of total)" tooltips(p, HoverTool, [('Sample', '@SM'), ('Chromosome', '@chr')]) return p
def plot_pca(pcaobjfile, pca_results_file=None, metadata=None, taptool_url=None, **kwargs): """Make PCA plot Args: pcaobjfile (str): file name containing pickled pca object pca_results_file (str): pca results file metadata (str): metadata file name taptool_url (str): url prefix that is attached to taptool; typically a link to ensembl Returns: dict: dictionary with keys 'fig' pointing to a (:py:class:`~bokeh.models.GridPlot`) Bokeh GridPlot object and key 'table' pointing to a (:py:class:`~bokeh.widgets.DataTable`) DataTable """ with open(pcaobjfile, 'rb') as fh: pcaobj = pickle.load(fh) md = None if not metadata is None: md = pd.read_csv(metadata, index_col=0) df_pca = pd.read_csv(pca_results_file, index_col=kwargs.get('index_col', "SM")) df_pca['color'] = [kwargs.get('color', 'red')] * df_pca.shape[0] df_pca['x'] = df_pca['0'] df_pca['y'] = df_pca['1'] df_pca['size'] = [kwargs.get('size', 10)] * df_pca.shape[0] pca_source = ColumnDataSource(df_pca) cmap = colorbrewer(datalen = df_pca.shape[0]) callback = CustomJS(args=dict(source=pca_source), code="""pca_callback(source, cb_obj, "SM");""") xcallback = CustomJS(args=dict(source=pca_source), code="""pca_component(source, cb_obj, "x");""") ycallback = CustomJS(args=dict(source=pca_source), code="""pca_component(source, cb_obj, "y");""") if not md is None: # Waiting for callbacks to be implemented upstream in bokeh # rbg = RadioButtonGroup(labels=list(md.columns), # callback=callback) toggle_buttons = [Toggle(label=x, callback=callback) for x in list(md.columns) + ["TPM", "FPKM"]] else: toggle_buttons = [] pca_components = sorted([int(x) + 1 for x in pca_source.column_names if re.match("\d+", x)]) menulist = ["{} ({:.2f}%)".format(x, 100.0 * p) for x, p in zip(pca_components, pcaobj.explained_variance_ratio_)] component_x = Select(title = "PCA component x", options = menulist, value=menulist[0], callback=xcallback) component_y = Select(title = "PCA component y", options = menulist, value=menulist[1], callback=ycallback) # Make the pca plot kwargs = {'plot_width': 400, 'plot_height': 400, 'title_text_font_size': "12pt", 'title': "Principal component analysis", 'tools': TOOLS, 'x_axis_label_text_font_size': '10pt', 'x_major_label_orientation': np.pi/3, 'y_axis_label_text_font_size': '10pt', 'y_major_label_orientation': np.pi/3, } fig = points('x', 'y', df=pca_source, color='color', size='size', alpha=0.7, **kwargs) tooltiplist = [("sample", "@SM")] if "SM" in pca_source.column_names else [] if not md is None: tooltiplist = tooltiplist + [(str(x), "@{}".format(x)) for x in md.columns] + \ [("Detected genes (TPM)", "@TPM"), ("Detected genes (FPKM)", "@FPKM")] tooltips(fig, HoverTool, tooltiplist) # Loadings loadings = pd.DataFrame(pcaobj.components_).T loadings.columns = [str(x) for x in loadings.columns] loadings['x'] = loadings['0'] loadings['y'] = loadings['1'] try: loadings["gene_id"] = pcaobj.features except: smllogger.warn("failed to set gene_id") raise try: loadings["gene_name"] = [pcaobj.labels[x] for x in loadings["gene_id"]] except: smllogger.warn("failed to set gene_name") raise loadings_source = ColumnDataSource(loadings) kwargs.update({'title': "Loadings"}) loadings_fig = points(x='x', y='y', df=loadings_source, **kwargs) tooltips(loadings_fig, HoverTool, [('gene_id', '@gene_id'), ('gene_name', '@gene_name')]) x_loadings_callback = CustomJS(args=dict(source=loadings_source), code="""pca_loadings(source, cb_obj, "x");""") y_loadings_callback = CustomJS(args=dict(source=loadings_source), code="""pca_loadings(source, cb_obj, "y");""") menulist = ["{} ({:.2f}%)".format(x, 100.0 * p) for x, p in zip(pca_components, pcaobj.explained_variance_ratio_)] loadings_component_x = Select(title = "PCA loading x", options = menulist, value=menulist[0], callback=x_loadings_callback) loadings_component_y = Select(title = "PCA loading y", options = menulist, value=menulist[1], callback=y_loadings_callback) # Add taptool url if present if taptool_url: loadings_fig.add_tools(TapTool(callback=OpenURL(url=taptool_url))) # Detected genes, FPKM and TPM kwargs.update({'title': 'Number of detected genes', 'xlabel': "Sample", 'ylabel': "Detected genes", 'x_range': list(pca_source.data["SM"])}) n_genes_fig = dotplot(df=pca_source, x="SM", y="TPM", **kwargs) tooltips(n_genes_fig, HoverTool, [('sample', '@SM'), ('# genes (TPM)', '@TPM'), ('# genes (FPKM)', '@FPKM')]) buttons = toggle_buttons + [component_x, component_y] + [loadings_component_x, loadings_component_y] return {'pca' : vform(*(buttons + [gridplot([[fig, loadings_fig, n_genes_fig]])]))}