Пример #1
0
def atacseq_qualimap_plot_coverage_per_contig(df, **kwargs):
    p = points(
        df=df,
        x="chrlen_percent",
        y="mapped_bases_percent",
        glyph="circle",
        return_source=True,
        legend="top_right",
        tools=DEFAULT_TOOLS,
        size=6,
        alpha=0.9,
        color="SM",
        plot_height=600,
        plot_width=800,
    )
    p.xaxis.axis_label = "chrlen (% of genome)"
    p.yaxis.axis_label = "mapped reads (% of total)"
    tooltips(p, HoverTool, [("Sample", "@SM"), ("Chromosome", "@chr")])
    return p
Пример #2
0
def atacseq_qualimap_plot_coverage_per_contig(df, **kwargs):
    """Plot qualimap coverage per contig.

    Args:
      df(:class:`~pandas.DataFrame`): data frame holding summary of qualimap results. The summary is provided by  :meth:`snakemakelib.odo.qualimap.resource_genome_results`.
      kwargs(dict): keyword arguments

    Returns:
      :class:`bokeh.plotting.figure`: bokeh scatter plot

    Examples:

    .. bokeh-plot::
        :source-position: above

        import pandas as pd
        from bokeh.plotting import show
        from snakemakelib_workflows.atacseq.app import atacseq_qualimap_plot_coverage_per_contig

        df = pd.DataFrame([['S1', 'chr1', 4e6, 8e6, 2, 5, 80, 40],
                           ['S1', 'chr2', 1e6, 12e6, 12, 20, 20, 60],
                           ['S2', 'chr1', 4e6, 12e6, 3, 5, 80, 60],
                           ['S2', 'chr2', 1e6, 8e6, 8, 20, 20, 40]],
                           columns=['SM', 'chr','chrlen', 'mapped_bases', 'mean_coverage', 'sd', 'chrlen_percent','mapped_bases_percent'])
        p = atacseq_qualimap_plot_coverage_per_contig(df)
        show(p)


    """
    p = points(df=df, x="chrlen_percent",
               y="mapped_bases_percent", glyph="circle",
               return_source=True, legend="top_right",
               tools=DEFAULT_TOOLS, size=6, alpha=.9,
               color="SM", plot_height=600, plot_width=800)
    p.xaxis.axis_label = "chrlen (% of genome)"
    p.yaxis.axis_label = "mapped reads (% of total)"
    tooltips(p, HoverTool, [('Sample', '@SM'),
                            ('Chromosome', '@chr')])
    return p
Пример #3
0
def plot_pca(pcaobjfile, pca_results_file=None, metadata=None, taptool_url=None, **kwargs):
    """Make PCA plot

    Args:
      pcaobjfile (str): file name containing pickled pca object
      pca_results_file (str): pca results file
      metadata (str): metadata file name
      taptool_url (str): url prefix that is attached to taptool; typically a link to ensembl

    Returns: 
      dict: dictionary with keys 'fig' pointing to a (:py:class:`~bokeh.models.GridPlot`) Bokeh GridPlot object and key 'table' pointing to a (:py:class:`~bokeh.widgets.DataTable`) DataTable

    """
    with open(pcaobjfile, 'rb') as fh:
        pcaobj = pickle.load(fh)
    md = None
    if not metadata is None:
        md = pd.read_csv(metadata, index_col=0)
    df_pca = pd.read_csv(pca_results_file, index_col=kwargs.get('index_col', "SM"))
    df_pca['color'] = [kwargs.get('color', 'red')] * df_pca.shape[0]
    df_pca['x'] = df_pca['0']
    df_pca['y'] = df_pca['1']
    df_pca['size'] = [kwargs.get('size', 10)] * df_pca.shape[0]
    pca_source = ColumnDataSource(df_pca)
    cmap = colorbrewer(datalen = df_pca.shape[0])

    callback = CustomJS(args=dict(source=pca_source),
                        code="""pca_callback(source, cb_obj, "SM");""")
    xcallback = CustomJS(args=dict(source=pca_source),
                         code="""pca_component(source, cb_obj, "x");""")
    ycallback = CustomJS(args=dict(source=pca_source),
                         code="""pca_component(source, cb_obj, "y");""")

    if not md is None:
        # Waiting for callbacks to be implemented upstream in bokeh
        # rbg = RadioButtonGroup(labels=list(md.columns),
        #                        callback=callback)
        toggle_buttons = [Toggle(label=x, callback=callback) for x in list(md.columns) + ["TPM", "FPKM"]]
    else:
        toggle_buttons = []

    pca_components = sorted([int(x) + 1 for x in pca_source.column_names if re.match("\d+", x)])
    menulist = ["{} ({:.2f}%)".format(x, 100.0 * p) for x, p in zip(pca_components, pcaobj.explained_variance_ratio_)]
    component_x = Select(title = "PCA component x", options = menulist, value=menulist[0],
                         callback=xcallback)
    component_y = Select(title = "PCA component y", options = menulist, value=menulist[1],
                         callback=ycallback)

    # Make the pca plot
    kwargs = {'plot_width': 400, 'plot_height': 400,
              'title_text_font_size': "12pt",
              'title': "Principal component analysis",
              'tools': TOOLS,
              'x_axis_label_text_font_size': '10pt',
              'x_major_label_orientation': np.pi/3,
              'y_axis_label_text_font_size': '10pt',
              'y_major_label_orientation': np.pi/3,
    }
    fig = points('x', 'y', df=pca_source, color='color', size='size', alpha=0.7, **kwargs)
    tooltiplist = [("sample", "@SM")] if "SM" in pca_source.column_names else []
    if not md is None:
        tooltiplist = tooltiplist + [(str(x), "@{}".format(x)) for x
                                     in md.columns] + \
        [("Detected genes (TPM)", "@TPM"), ("Detected genes (FPKM)", "@FPKM")]
    tooltips(fig, HoverTool, tooltiplist)

    # Loadings
    loadings = pd.DataFrame(pcaobj.components_).T
    loadings.columns = [str(x) for x in loadings.columns]
    loadings['x'] = loadings['0']
    loadings['y'] = loadings['1']
    try:
        loadings["gene_id"] = pcaobj.features
    except:
        smllogger.warn("failed to set gene_id")
        raise

    try:
        loadings["gene_name"] = [pcaobj.labels[x] for x in loadings["gene_id"]]
    except:
        smllogger.warn("failed to set gene_name")
        raise
    loadings_source = ColumnDataSource(loadings)
    kwargs.update({'title': "Loadings"})
    loadings_fig = points(x='x', y='y', df=loadings_source,
                          **kwargs)

    tooltips(loadings_fig, HoverTool, [('gene_id', '@gene_id'), ('gene_name', '@gene_name')])
    x_loadings_callback = CustomJS(args=dict(source=loadings_source),
                                   code="""pca_loadings(source, cb_obj, "x");""")
    y_loadings_callback = CustomJS(args=dict(source=loadings_source),
                                   code="""pca_loadings(source, cb_obj, "y");""")
    menulist = ["{} ({:.2f}%)".format(x, 100.0 * p) for x, p in zip(pca_components, pcaobj.explained_variance_ratio_)]
    loadings_component_x = Select(title = "PCA loading x", options = menulist, value=menulist[0],
                         callback=x_loadings_callback)
    loadings_component_y = Select(title = "PCA loading y", options = menulist, value=menulist[1],
                         callback=y_loadings_callback)


    # Add taptool url if present
    if taptool_url:
        loadings_fig.add_tools(TapTool(callback=OpenURL(url=taptool_url)))

    # Detected genes, FPKM and TPM
    kwargs.update({'title': 'Number of detected genes',
                   'xlabel': "Sample",
                   'ylabel': "Detected genes",
                   'x_range': list(pca_source.data["SM"])})
    n_genes_fig = dotplot(df=pca_source, x="SM", y="TPM", **kwargs)
    tooltips(n_genes_fig, HoverTool, [('sample', '@SM'),
                                      ('# genes (TPM)', '@TPM'),
                                      ('# genes (FPKM)', '@FPKM')])

    buttons = toggle_buttons + [component_x, component_y] + [loadings_component_x, loadings_component_y]
    return {'pca' : vform(*(buttons + [gridplot([[fig, loadings_fig, n_genes_fig]])]))}