Пример #1
0
def scrnaseq_results_plot_alignrseqc(df, **kwargs):
    """
    Plot scRNASeq QC summary.

    Args:
      df (DataFrame): data frame consisting of summary data from alignments and RSeQC
      kwargs: keyword arguments to bokeh plot functions

    Returns:
      (dict): dictionary with a bokeh gridplot and a table
    """
    df = df.set_index("SM")
    source = ColumnDataSource(df)
    columns = [
        TableColumn(field="SM", title="Sample"),
        TableColumn(field="Number_of_input_reads",
                    title="Number of input reads"),
        TableColumn(field="Uniquely_mapped_reads_PCT",
                    title="Uniquely mapped reads (%)"),
        TableColumn(field="Mismatch_rate_per_base,_PCT",
                    title="Mismatch rate per base (%)"),
        TableColumn(field="Insertion_rate_per_base",
                    title="Insertion rate per base (%)"),
        TableColumn(field="Deletion_rate_per_base",
                    title="Deletion rate per base (%)"),
        TableColumn(field="PCT_of_reads_unmapped",
                    title="Unmapped reads (%)"),
    ]
    table = DataTable(source=source, columns=columns,
                      editable=False, width=1000)

    kwargs = {'plot_width': 400, 'plot_height': 400,
              'title_text_font_size': "16pt",
              'x_axis_label': 'Sample',
              'x_axis_label_text_font_size': '12pt',
              'x_major_label_orientation': np.pi/3,
              'y_axis_label': 'Reads',
              'y_axis_label_text_font_size': '12pt',
              'y_major_label_orientation': np.pi/3
              }    
    # Input reads
    p1 = dotplot(
        df=source,
        title="Number of input reads", tools=DEFAULT_TOOLS, 
        x="SM", y="Number_of_input_reads",
        **dict(kwargs, **{'x_range': list(df.index), 'y_axis_type': "log"}))
    tooltips(p1, HoverTool, [('Sample', '@SM'),
                             ('Reads', '@Number_of_input_reads')])
              
    # Uniquely mapping
    kwargs['y_axis_label'] = 'Percent'
    p2 = dotplot(
        df=source,
        tools=DEFAULT_TOOLS, title="Uniquely mapping reads",
        x="SM", y="Uniquely_mapped_reads_PCT",
        **dict(kwargs, **{'x_range': p1.x_range, 'y_range': [0, 100]})
    )
    tooltips(p2, HoverTool, [('Sample', '@SM'),
                             ('Pct_mapped', '@Uniquely_mapped_reads_PCT')])
    
    # Unmapped
    p3 = dotplot(
        df=source,
        tools=DEFAULT_TOOLS, title="Unmapped reads",
        x="SM", y="PCT_of_reads_unmapped",
        **dict(kwargs, **{'x_range': p1.x_range, 'y_range': [0, 100]})
    )
    tooltips(p3, HoverTool, [('Sample', '@SM'),
                             ('Pct_unmapped', '@PCT_of_reads_unmapped')])
              
    # Mismatch/indel rate
    p4 = dotplot(
        df=source,
        tools=DEFAULT_TOOLS,
        title="Mismatch/indel rate",
        x="SM",
        y=[
            "Mismatch_rate_per_base,_PCT",
            "Insertion_rate_per_base",
            "Deletion_rate_per_base"
        ],
        **dict(kwargs, **{'x_range': p1.x_range, 'y_range': [0, 1], 'color': ["blue", "red", "green"]})
    )
    tooltips(p4, HoverTool,  [('Sample', '@SM'),
                              ('Mismatch rate per base',
                               '@Mismatch_rate_per_base,_PCT'),
                              ('Insertion rate per base',
                               '@Insertion_rate_per_base'),
                              ('Deletion rate per base',
                               '@Deletion_rate_per_base'), ])
    select_tool = p4.select(dict(type=BoxSelectTool))
    select_tool.dimensions = ['width']
    
    # Unmapped
    p5 = dotplot(
        tools=DEFAULT_TOOLS, title="Mismatch/indel sum",
        x="SM", y="mismatch_sum", df=source,
        **dict(kwargs, **{'x_range': p1.x_range})
    )
    tooltips(p5, HoverTool, [('Sample', '@SM'),
                             ('Mismatch/indel rate per base',
                              '@mismatch_sum'), ])
    select_tool = p5.select(dict(type=BoxSelectTool))
    select_tool.dimensions = ['width']
    
    # Fraction reads mapping to 10% right-most end
    p6 = dotplot(
        tools=DEFAULT_TOOLS, title="Tags mapping to exons",
        x="SM", y="ExonMap_PCT", df=source,
        **dict(kwargs, **{'x_range': p1.x_range})
    )
    tooltips(p6, HoverTool, [('Sample', '@SM'),
                             ('ExonMap (%)', '@ExonMap_PCT'), ])
    
    # Fraction reads mapping to 10% right-most end
    p7 = dotplot(
        tools=DEFAULT_TOOLS, title="Reads mapping to 3' end",
        x="SM", y="three_prime_map", df=source,
        **dict(kwargs, **{'x_range': p1.x_range})
    )
    tooltips(p7, HoverTool, [('Sample', '@SM'),
                             ("3' map", '@three_prime_map'), ])
    
    return {'fig': gridplot([[p1, p2, p3], [p4, p5, p6], [p7]]), 'table': table}
Пример #2
0
def plot_pca(pcaobjfile, pca_results_file=None, metadata=None, taptool_url=None, **kwargs):
    """Make PCA plot

    Args:
      pcaobjfile (str): file name containing pickled pca object
      pca_results_file (str): pca results file
      metadata (str): metadata file name
      taptool_url (str): url prefix that is attached to taptool; typically a link to ensembl

    Returns: 
      dict: dictionary with keys 'fig' pointing to a (:py:class:`~bokeh.models.GridPlot`) Bokeh GridPlot object and key 'table' pointing to a (:py:class:`~bokeh.widgets.DataTable`) DataTable

    """
    with open(pcaobjfile, 'rb') as fh:
        pcaobj = pickle.load(fh)
    md = None
    if not metadata is None:
        md = pd.read_csv(metadata, index_col=0)
    df_pca = pd.read_csv(pca_results_file, index_col=kwargs.get('index_col', "SM"))
    df_pca['color'] = [kwargs.get('color', 'red')] * df_pca.shape[0]
    df_pca['x'] = df_pca['0']
    df_pca['y'] = df_pca['1']
    df_pca['size'] = [kwargs.get('size', 10)] * df_pca.shape[0]
    pca_source = ColumnDataSource(df_pca)
    cmap = colorbrewer(datalen = df_pca.shape[0])

    callback = CustomJS(args=dict(source=pca_source),
                        code="""pca_callback(source, cb_obj, "SM");""")
    xcallback = CustomJS(args=dict(source=pca_source),
                         code="""pca_component(source, cb_obj, "x");""")
    ycallback = CustomJS(args=dict(source=pca_source),
                         code="""pca_component(source, cb_obj, "y");""")

    if not md is None:
        # Waiting for callbacks to be implemented upstream in bokeh
        # rbg = RadioButtonGroup(labels=list(md.columns),
        #                        callback=callback)
        toggle_buttons = [Toggle(label=x, callback=callback) for x in list(md.columns) + ["TPM", "FPKM"]]
    else:
        toggle_buttons = []

    pca_components = sorted([int(x) + 1 for x in pca_source.column_names if re.match("\d+", x)])
    menulist = ["{} ({:.2f}%)".format(x, 100.0 * p) for x, p in zip(pca_components, pcaobj.explained_variance_ratio_)]
    component_x = Select(title = "PCA component x", options = menulist, value=menulist[0],
                         callback=xcallback)
    component_y = Select(title = "PCA component y", options = menulist, value=menulist[1],
                         callback=ycallback)

    # Make the pca plot
    kwargs = {'plot_width': 400, 'plot_height': 400,
              'title_text_font_size': "12pt",
              'title': "Principal component analysis",
              'tools': TOOLS,
              'x_axis_label_text_font_size': '10pt',
              'x_major_label_orientation': np.pi/3,
              'y_axis_label_text_font_size': '10pt',
              'y_major_label_orientation': np.pi/3,
    }
    fig = points('x', 'y', df=pca_source, color='color', size='size', alpha=0.7, **kwargs)
    tooltiplist = [("sample", "@SM")] if "SM" in pca_source.column_names else []
    if not md is None:
        tooltiplist = tooltiplist + [(str(x), "@{}".format(x)) for x
                                     in md.columns] + \
        [("Detected genes (TPM)", "@TPM"), ("Detected genes (FPKM)", "@FPKM")]
    tooltips(fig, HoverTool, tooltiplist)

    # Loadings
    loadings = pd.DataFrame(pcaobj.components_).T
    loadings.columns = [str(x) for x in loadings.columns]
    loadings['x'] = loadings['0']
    loadings['y'] = loadings['1']
    try:
        loadings["gene_id"] = pcaobj.features
    except:
        smllogger.warn("failed to set gene_id")
        raise

    try:
        loadings["gene_name"] = [pcaobj.labels[x] for x in loadings["gene_id"]]
    except:
        smllogger.warn("failed to set gene_name")
        raise
    loadings_source = ColumnDataSource(loadings)
    kwargs.update({'title': "Loadings"})
    loadings_fig = points(x='x', y='y', df=loadings_source,
                          **kwargs)

    tooltips(loadings_fig, HoverTool, [('gene_id', '@gene_id'), ('gene_name', '@gene_name')])
    x_loadings_callback = CustomJS(args=dict(source=loadings_source),
                                   code="""pca_loadings(source, cb_obj, "x");""")
    y_loadings_callback = CustomJS(args=dict(source=loadings_source),
                                   code="""pca_loadings(source, cb_obj, "y");""")
    menulist = ["{} ({:.2f}%)".format(x, 100.0 * p) for x, p in zip(pca_components, pcaobj.explained_variance_ratio_)]
    loadings_component_x = Select(title = "PCA loading x", options = menulist, value=menulist[0],
                         callback=x_loadings_callback)
    loadings_component_y = Select(title = "PCA loading y", options = menulist, value=menulist[1],
                         callback=y_loadings_callback)


    # Add taptool url if present
    if taptool_url:
        loadings_fig.add_tools(TapTool(callback=OpenURL(url=taptool_url)))

    # Detected genes, FPKM and TPM
    kwargs.update({'title': 'Number of detected genes',
                   'xlabel': "Sample",
                   'ylabel': "Detected genes",
                   'x_range': list(pca_source.data["SM"])})
    n_genes_fig = dotplot(df=pca_source, x="SM", y="TPM", **kwargs)
    tooltips(n_genes_fig, HoverTool, [('sample', '@SM'),
                                      ('# genes (TPM)', '@TPM'),
                                      ('# genes (FPKM)', '@FPKM')])

    buttons = toggle_buttons + [component_x, component_y] + [loadings_component_x, loadings_component_y]
    return {'pca' : vform(*(buttons + [gridplot([[fig, loadings_fig, n_genes_fig]])]))}