Exemplo n.º 1
0
def plot_revigo(
    rev,
    outline=2,
    expand_points=(1.05, 1.2),
    figure_size=(8, 8),
    font_size=8,
    point_size=3,
    point_alpha=0.7,
    palette='RdPu',
    dispensability_cutoff=1.,
    show_all_labels=False,
    text_column='name',
    term_size_limit=None,
):

    import plotnine as p9
    import matplotlib.patheffects as path_effects

    pe = [
        path_effects.Stroke(linewidth=2, foreground='white'),
        path_effects.Normal()
    ]
    if not show_all_labels:
        lbl_df = rev[(rev.eliminated == 0)
                     & (rev.dispensability < dispensability_cutoff)]
        if term_size_limit is not None:
            lbl_df = lbl_df[lbl_df.term_size < term_size_limit]
    else:
        lbl_df = rev

    g = (p9.ggplot(p9.aes(x='plot_X', y='plot_Y'), data=rev) +
         p9.geom_point(p9.aes(fill='neglog10', size='frequency'),
                       color='black',
                       alpha=point_alpha) +
         p9.geom_text(p9.aes(label=text_column),
                      data=lbl_df,
                      size=font_size,
                      adjust_text={
                          'expand_points': expand_points,
                          'arrowprops': {
                              'arrowstyle': '-'
                          },
                          'x': rev.plot_X.values,
                          'y': rev.plot_Y.values
                      },
                      path_effects=pe) + p9.theme_bw() +
         p9.scale_fill_distiller(type='seq', palette=palette, direction=1) +
         p9.labs(x='Semantic similarity space',
                 y='',
                 fill='-log10(adj. p-value)',
                 size='Term frequency') +
         p9.scale_size_continuous(range=(2, 7), trans='log10') +
         p9.theme(figure_size=figure_size,
                  axis_text_x=p9.element_blank(),
                  axis_text_y=p9.element_blank(),
                  axis_ticks=p9.element_blank()))

    return g
Exemplo n.º 2
0
def test_text_aesthetics():
    p = (
        ggplot(df, aes(y='y', label='label')) +
        geom_text(aes('x', label='label'), size=15, ha='left') + geom_text(
            aes('x+1', angle='angle'), size=15, va='top', show_legend=False) +
        geom_text(
            aes('x+2', label='label', alpha='z'), size=15, show_legend=False) +
        geom_text(aes('x+3', color='factor(z)'), size=15, show_legend=False) +
        geom_text(aes('x+5', size='z'), ha='right', show_legend=False) +
        scale_size_continuous(range=(12, 30)) +
        scale_y_continuous(limits=(-0.5, n - 0.5)))

    assert p == 'text_aesthetics'
Exemplo n.º 3
0
def test_text_aesthetics():
    p = (ggplot(df, aes(y='y', label='label')) +
         geom_text(aes('x', label='label'), size=15, ha='left') +
         geom_text(aes('x+1', angle='angle'),
                   size=15, va='top', show_legend=False) +
         geom_text(aes('x+2', label='label', alpha='z'),
                   size=15, show_legend=False) +
         geom_text(aes('x+3', color='factor(z)'),
                   size=15, show_legend=False) +
         geom_text(aes('x+5', size='z'),
                   ha='right', show_legend=False) +
         scale_size_continuous(range=(12, 30)) +
         scale_y_continuous(limits=(-0.5, n-0.5)))

    assert p == 'text_aesthetics'
Exemplo n.º 4
0
def ggpca(x,
          y=None,
          center='col',
          scale='none',
          rlab=False,
          clab=False,
          cshow=None,
          rsize=4,
          csize=2,
          lsize=10,
          lnudge=0.03,
          ralpha=0.6,
          calpha=1.0,
          clightalpha=0,
          rname='sample',
          cname='variable',
          lname='',
          grid=True,
          printit=False,
          xsvd=None,
          invert1=False,
          invert2=False,
          colscale=None,
          **kwargs):
    if cshow is None:
        cshow = x.shape[1]
    if rlab is not None and isinstance(rlab, bool):
        rlab = x.index if rlab else ''
    if clab is not None and isinstance(clab, bool):
        clab = x.columns if clab else ''
    if y is not None:
        pass
    x = x.loc[:, x.isnull().sum(axis=0) == 0]
    if xsvd is None:
        xsvd = svdForPca(x, center, scale)
    rsf = np.max(xsvd[0].iloc[:, 0]) - np.min(xsvd[0].iloc[:, 0])
    csf = np.max(xsvd[2].iloc[0, :]) - np.min(xsvd[2].iloc[0, :])
    sizeRange = sorted([csize, rsize])
    alphaRange = sorted([calpha, ralpha])
    ggd = pd.DataFrame({
        'PC1': xsvd[0].iloc[:, 0] / rsf,
        'PC2': xsvd[0].iloc[:, 1] / rsf,
        'label': rlab,
        'size': rsize,
        'alpha': ralpha
    })
    cclass = []
    if cshow > 0:
        cdata = pd.DataFrame({
            'PC1': xsvd[2].iloc[0, :] / csf,
            'PC2': xsvd[2].iloc[1, :] / csf,
            'label': clab,
            'size': csize,
            'alpha': calpha
        })
        if cshow < x.shape[1]:
            cscores = cdata['PC1']**2 + cdata['PC2']**2
            keep = cscores.sort_values(ascending=False).head(cshow).index
            if clightalpha > 0:
                cdata.loc[~cdata.index.isin(keep), 'label'] = ''
                cdata.loc[~cdata.index.isin(keep), 'alpha'] = clightalpha
                alphaRange = [
                    np.min([alphaRange[0], clightalpha]),
                    np.max([alphaRange[1], clightalpha])
                ]
            else:
                cdata = cdata.loc[cdata.index.isin(keep)]
        ggd = pd.concat([cdata, ggd])
        cclass = [cname] * cdata.shape[0]
    if invert1:
        ggd['PC1'] = -ggd['PC1']
    if invert2:
        ggd['PC2'] = -ggd['PC2']
    if y is not None:
        ggd['class'] = cclass + list(y.loc[x.index])
    else:
        ggd['class'] = cclass + ([rname] * x.shape[0])
    ggo = gg.ggplot(
        ggd,
        gg.aes(x='PC1',
               y='PC2',
               color='class',
               size='size',
               alpha='alpha',
               label='label'))
    ggo += gg.geom_hline(yintercept=0, color='lightgray')
    ggo += gg.geom_vline(xintercept=0, color='lightgray')
    ggo += gg.geom_point()
    ggo += gg.theme_bw()
    ggo += gg.geom_text(nudge_y=lnudge, size=lsize, show_legend=False)
    if colscale is None and len(ggd['class'].unique()) < 8:
        colscale = [
            'darkslategray', 'goldenrod', 'lightseagreen', 'orangered',
            'dodgerblue', 'darkorchid'
        ]
        colscale = colscale[0:(len(ggd['class'].unique()) - 1)] + ['gray']
        if len(colscale) == 2 and cshow > 0:
            colscale = ['black', 'darkgray']
        if len(colscale) == 2 and cshow == 0:
            colscale = ['black', 'red']
        if len(colscale) == 3:
            colscale = ['black', 'red', 'darkgray']
    ggo += gg.scale_color_manual(values=colscale, name=lname)
    ggo += gg.scale_size_continuous(guide=False, range=sizeRange)
    ggo += gg.scale_alpha_continuous(guide=False, range=alphaRange)
    ggo += gg.xlab('PC1 (' +
                   str(np.round(100 * xsvd[1][0]**2 /
                                ((xsvd[1]**2).sum()), 1)) +
                   '% explained var.)')
    ggo += gg.ylab('PC2 (' +
                   str(np.round(100 * xsvd[1][1]**2 /
                                ((xsvd[1]**2).sum()), 1)) +
                   '% explained var.)')
    if not grid:
        ggo += gg.theme(panel_grid_minor=gg.element_blank(),
                        panel_grid_major=gg.element_blank(),
                        panel_background=gg.element_blank())
    ggo += gg.theme(axis_ticks=gg.element_blank(),
                    axis_text_x=gg.element_blank(),
                    axis_text_y=gg.element_blank())
    if printit:
        print(ggo)
    return ggo

dataset = "dmso_treated"

umap_resistant_type_gg = (
    gg.ggplot(embedding_df, gg.aes(x="x", y="y"))
    + gg.geom_point(
        gg.aes(fill="Metadata_clone_type", shape="Metadata_batch", size="Metadata_cell_count"),
        color='black', alpha=0.6)
    + gg.theme_bw()
    + gg.xlab("UMAP (X)")
    + gg.ylab("UMAP (Y)")
    + gg.ggtitle("DMSO treated samples")
    + gg.scale_shape_manual(name="Batch", values=[".", "+", "x"])
    + gg.scale_fill_manual(name="Clone type", values=["#1F8AA5", "#E98831"])
    + gg.scale_size_continuous(name="Cell count")
    + gg.theme(
        strip_text=gg.element_text(size=6, color="black"),
        strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
    )
)

file = os.path.join("figures", "umap", f"{dataset}_umap_resistant_type")

for extension in save_file_extensions:
    umap_resistant_type_gg.save(filename='{}{}'.format(file, extension), height=3, width=3.5, dpi=400)

umap_resistant_type_gg


# In[10]: