示例#1
0
def plot_significance_vs_ranking(
    summary_df, method_name, x_label, output_figure_filename
):
    # Format input dataframe
    plot_df = pd.DataFrame(
        data={
            "Test statistic": summary_df[
                method_stats_dict[method_name] + " (Real)"
            ].values,
            "Percentile rank": summary_df["Rank (simulated)"].rank(pct=True).values,
        },
        index=summary_df.index,
    )

    fig = pn.ggplot(plot_df, pn.aes(x="Test statistic", y="Percentile rank"))
    fig += pn.geom_point()
    fig += pn.geom_point(
        plot_df[plot_df["Percentile rank"] > 0.9],
        pn.aes(x="Test statistic", y="Percentile rank"),
        color="red",
    )
    fig += pn.geom_text(
        pn.aes(
            label=[
                x if plot_df.loc[x, "Percentile rank"] > 0.9 else ""
                for x in plot_df.index
            ]
        ),
        ha="left",
        va="top",
        size=5,
    )
    fig += pn.labs(
        x=x_label,
        y="Percentile of ranking",
        title=f"{method_name} pathway statistics vs ranking",
    )
    fig += pn.theme_bw()
    fig += pn.theme(
        legend_title_align="center",
        plot_background=pn.element_rect(fill="white"),
        legend_key=pn.element_rect(fill="white", colour="white"),
        legend_title=pn.element_text(family="sans-serif", size=15),
        legend_text=pn.element_text(family="sans-serif", size=12),
        plot_title=pn.element_text(family="sans-serif", size=15),
        axis_text=pn.element_text(family="sans-serif", size=12),
        axis_title=pn.element_text(family="sans-serif", size=15),
    )

    print(fig)

    # Save figure
    fig.save(
        output_figure_filename,
        format="svg",
        bbox_inches="tight",
        transparent=True,
        pad_inches=0,
        dpi=300,
    )
示例#2
0
def plot_fitting(x, y, resonance_frequency, parameter):
    """ Plots the phase response and the corresponding fit of the harmonic damped oscillator.

    Args:
        x (`float array`):                       X coordinates (frequency in kHz)
        y (`float array`):                       Y coordinates (phase in radians)
        resonance_frequency (`float array`):     Resonance frequency given by the fit of x and y
        parameter (`float array`):               Others parameters of function fit (Q factor, offset, linear background)

    Returns:
        p (`ggplot object`):                     Returns a ggplot object
    """

    y_fit = fit_function(x, resonance_frequency, parameter[0], parameter[1],
                         parameter[2])
    y_fit.name = 'Phase fit'
    x.name = 'Frequency (kHz)'
    y.name = 'Phase (rad)'
    data = concat([x, y, y_fit], axis=1)
    col_names = list(data)

    # Plot data
    p = ggplot(aes(x=col_names[0], y=col_names[1]), data=data) + \
        geom_point() + \
        geom_line(aes(x=col_names[0], y=col_names[2]),  color='red', size=0.5) + \
        theme_seaborn(style='ticks', context='talk', font_scale=0.75) + \
        theme(figure_size=(15, 7), strip_background=element_rect(fill='white'), axis_line_x=element_line(color='black'),
              axis_line_y=element_line(color='black'), legend_key=element_rect(fill='white', color='white'))
    return p
示例#3
0
def plot_paired_ranking(
    method1_summary_df,
    method2_summary_df,
    method1_name,
    method2_name,
    output_figure_filename,
):
    # Join dataframes to make sure the rows are aligned
    merged_summary_df = method1_summary_df.merge(
        method2_summary_df,
        left_index=True,
        right_index=True,
        suffixes=[f"_{method1_name}", f"_{method2_name}"],
    )

    # Format input dataframe
    plot_df = pd.DataFrame(
        data={
            "Method1 ranking": merged_summary_df[
                f"Percentile (simulated)_{method1_name}"
            ].values,
            "Method2 ranking": merged_summary_df[
                f"Percentile (simulated)_{method2_name}"
            ].values,
        },
        index=merged_summary_df.index,
    )
    fig = pn.ggplot(plot_df, pn.aes(x="Method1 ranking", y="Method2 ranking"))
    fig += pn.geom_point()
    fig += pn.labs(
        x=f"{method1_name} pathway ranking",
        y=f"{method2_name} pathway ranking",
        title=f"{method1_name} vs {method2_name} pathway ranking",
    )
    fig += pn.theme_bw()
    fig += pn.theme(
        legend_title_align="center",
        plot_background=pn.element_rect(fill="white"),
        legend_key=pn.element_rect(fill="white", colour="white"),
        legend_title=pn.element_text(family="sans-serif", size=15),
        legend_text=pn.element_text(family="sans-serif", size=12),
        plot_title=pn.element_text(family="sans-serif", size=15),
        axis_text=pn.element_text(family="sans-serif", size=12),
        axis_title=pn.element_text(family="sans-serif", size=15),
    )

    # Save figure
    fig.save(
        output_figure_filename,
        format="svg",
        bbox_inches="tight",
        transparent=True,
        pad_inches=0,
        dpi=300,
    )
    print(fig)
示例#4
0
 def __init__(self, base_size=11, base_family='DejaVu Sans'):
     theme_light.__init__(self, base_size, base_family)
     self.add_theme(theme(
         axis_ticks=element_line(color='#DDDDDD', size=0.5),
         panel_border=element_rect(fill='None', color='#838383',
                                   size=1),
         strip_background=element_rect(
             fill='#DDDDDD', color='#838383', size=1),
         strip_text_x=element_text(color='black'),
         strip_text_y=element_text(color='black', angle=-90)
     ), inplace=True)
示例#5
0
def plot_response_shift(x, y, resonance_frequency_without, parameter_without,
                        xx, yy, resonance_frequency_with, parameter):
    """ Plots the phase response of pre start data without and with cell attached to cantilever with the
    respective function fit.

    Args:
        x (`float array`):                               X coordinates w/o cell (frequency in kHz)
        y (`float array`):                               Y coordinates w/o cell (phase in radians)
        xx (`float array`):                              X coordinates w/ cell(frequency in kHz)
        yy (`float array`):                              Y coordinates w/ cell (phase in radians)
        resonance_frequency_without (`float array`):     Resonance frequency given by the fit of x and y  w/o cell
        resonance_frequency_with (`float array`):        Resonance frequency given by the fit of x and y w/ cell
        parameter (`float array`):                       Others parameters of function fit (Q factor, offset, linear
                                                         background) w/o cell
        parameter_without (`float array`):               Others parameters of function fit (Q factor, offset, linear
                                                         background) w/ cell

    Returns:
        p (`ggplot object`):                             Returns a ggplot object
    """

    y_fit_without = fit_function(x, resonance_frequency_without,
                                 parameter_without[0], parameter_without[1],
                                 parameter_without[2])
    y_fit_with = fit_function(xx, resonance_frequency_with, parameter[0],
                              parameter[1], parameter[2])
    y_fit_without.name = 'Phase fit w/o cell att.'
    y_fit_with.name = 'Phase fit w cell att.'
    x.name = 'Frequency without (kHz)'
    y.name = 'Raw phase w/o cell att.'
    xx.name = 'Frequency with (kHz)'
    yy.name = 'Raw phase w cell att.'
    data = concat([x, y, y_fit_without, xx, yy, y_fit_with], axis=1)
    df = melt(data,
              id_vars=['Frequency with (kHz)'],
              value_vars=['Phase fit w cell att.', 'Phase fit w/o cell att.'])
    df.loc[df['variable'] == 'Phase fit w/o cell att.',
           'Frequency with (kHz)'] = x.values
    df2 = melt(data,
               id_vars=['Frequency with (kHz)'],
               value_vars=['Raw phase w cell att.', 'Raw phase w/o cell att.'])
    df2.loc[df2['variable'] == 'Raw phase w/o cell att.',
            'Frequency with (kHz)'] = x.values
    # Plot data
    p = ggplot(data=df) + \
        geom_point(aes(x="Frequency with (kHz)", y='value', fill='variable'), data=df2, alpha=0.6) + \
        geom_line(aes(x="Frequency with (kHz)", y='value', color='variable')) + \
        xlab('Frequency (kHz)') + \
        ylab('Phase (rad)') + \
        labs(fill='Raw data', color='Function fits') + \
        theme_seaborn(style='ticks', context='talk', font_scale=0.75) + \
        theme(figure_size=(15, 7), strip_background=element_rect(fill='white'), axis_line_x=element_line(color='black'),
              axis_line_y=element_line(color='black'), legend_key=element_rect(fill='white', color='white'))
    return p
示例#6
0
def theme_energinet() -> p9.themes.theme:
    """Create a simple Energinet theme."""
    return p9.theme(
        text=p9.element_text(family=endktheme.style.font_family()),
        axis_line=p9.element_line(color="black"),
        plot_background=p9.element_blank(),
        panel_background=p9.element_rect(fill="white"),
        legend_background=p9.element_rect(fill="white"),
        legend_key=p9.element_blank(),
        panel_grid=p9.element_blank(),
        axis_ticks=p9.element_blank(),
    )
示例#7
0
 def __init__(self, base_size=11, base_family='DejaVu Sans'):
     theme_light.__init__(self, base_size, base_family)
     self.add_theme(theme(
         axis_ticks=element_line(color='#DDDDDD', size=0.5),
         panel_border=element_rect(fill='None', color='#838383',
                                   size=1),
         strip_background=element_rect(
             fill='#DDDDDD', color='#838383', size=1),
         strip_text_x=element_text(color='black'),
         strip_text_y=element_text(color='black', angle=-90),
         legend_key=element_blank()
     ), inplace=True)
示例#8
0
文件: plot.py 项目: NPSDC/qb
 def __init__(self, base_size=11, base_family="DejaVu Sans"):
     theme_light.__init__(self, base_size, base_family)
     self.add_theme(
         theme(
             axis_ticks=element_line(color="#DDDDDD", size=0.5),
             panel_border=element_rect(fill="None", color="#838383", size=1),
             strip_background=element_rect(fill="#DDDDDD", color="#838383", size=1),
             strip_text_x=element_text(color="black"),
             strip_text_y=element_text(color="black", angle=-90),
             legend_key=element_blank(),
         ),
         inplace=True,
     )
示例#9
0
def plot_bargraph(count_plot_df, plot_df):
    """
    Plots the bargraph 
    Arguments:
        count_plot_df - The dataframe that contains lemma counts
        plot_df - the dataframe that contains the odds ratio and lemmas
    """

    graph = (
        p9.ggplot(count_plot_df.astype({"count": int}),
                  p9.aes(x="lemma", y="count")) +
        p9.geom_col(position=p9.position_dodge(width=0.5), fill="#253494") +
        p9.coord_flip() + p9.facet_wrap("repository", scales='free_x') +
        p9.scale_x_discrete(limits=(plot_df.sort_values(
            "odds_ratio", ascending=True).lemma.tolist())) +
        p9.scale_y_continuous(labels=custom_format('{:,.0g}')) +
        p9.labs(x=None) + p9.theme_seaborn(
            context='paper', style="ticks", font="Arial", font_scale=0.95) +
        p9.theme(
            # 640 x 480
            figure_size=(6.66, 5),
            strip_background=p9.element_rect(fill="white"),
            strip_text=p9.element_text(size=12),
            axis_title=p9.element_text(size=12),
            axis_text_x=p9.element_text(size=10),
        ))
    return graph
示例#10
0
def plot_downstream(clwe, table, output, ylim):
    df = pd.read_csv(data_file(table))
    df = df[df.clwe == clwe]
    df = df.assign(
        refine=pd.Categorical(df['refine'], ['Original', '+retrofit', '+synthetic']),
        language=pd.Categorical(df['language'], ['DE', 'ES', 'FR', 'IT', 'JA', 'RU', 'ZH', 'AVG'])
    )
    g = p9.ggplot(df, p9.aes(x='language', y='accuracy', fill='refine'))
    g += p9.geom_bar(position='dodge', stat='identity', width=.8)
    g += p9.coord_cartesian(ylim=ylim)
    g += p9.scale_fill_manual(['#999999', '#EA5F94', '#FFB14E'])
    g += p9.theme_void(base_size=FONT_SIZE, base_family='Arial')
    g += p9.theme(
        plot_background=p9.element_rect(fill='white'),
        panel_grid_major_y=p9.element_line(),
        axis_text_x=p9.element_text(margin={'t': 10}),
        axis_text_y=p9.element_text(margin={'r': 8}),
        legend_position=(.7, .9),
        legend_direction='horizontal',
        legend_title=p9.element_blank(),
        legend_text=p9.element_text(size=FONT_SIZE),
        legend_box_margin=0,
        figure_size=(12, 3)
    )
    g.save(filename=output_file(output))
示例#11
0
def theme_cognoma(fontsize_mult=1):
    return (gg.theme_bw(base_size=14 * fontsize_mult) + gg.theme(
        line=gg.element_line(color="#4d4d4d"),
        rect=gg.element_rect(fill="white", color=None),
        text=gg.element_text(color="black"),
        axis_ticks=gg.element_line(color="#4d4d4d"),
        legend_key=gg.element_rect(color=None),
        panel_border=gg.element_rect(color="#4d4d4d"),
        panel_grid=gg.element_line(color="#b3b3b3"),
        panel_grid_major_x=gg.element_blank(),
        panel_grid_minor=gg.element_blank(),
        strip_background=gg.element_rect(fill="#FEF2E2", color="#4d4d4d"),
        axis_text=gg.element_text(size=12 * fontsize_mult, color="#4d4d4d"),
        axis_title_x=gg.element_text(size=13 * fontsize_mult, color="#4d4d4d"),
        axis_title_y=gg.element_text(size=13 * fontsize_mult,
                                     color="#4d4d4d")))
def plot_replicate_density(
    df,
    batch,
    plate,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=300,
    height=1.5,
    width=2,
):
    density_gg = (
        gg.ggplot(df, gg.aes(x="pairwise_correlation", fill="replicate_info"))
        + gg.geom_density(alpha=0.3) + gg.scale_fill_manual(
            name="Replicate",
            labels={
                "True": "True",
                "False": "False"
            },
            values=["#B99638", "#2DB898"],
        ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") +
        gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme(
            title=gg.element_text(size=9),
            axis_text=gg.element_text(size=5),
            axis_title=gg.element_text(size=8),
            legend_text=gg.element_text(size=6),
            legend_title=gg.element_text(size=7),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        ))

    if output_file_base:
        save_figure(density_gg, output_file_base, output_file_extensions, dpi,
                    height, width)

    return density_gg
示例#13
0
 def __init__(self):
     pn.theme_minimal.__init__(self, base_family='Open Sans')
     self.add_theme(pn.theme(
         axis_title=pn.element_text(size=10),
         axis_title_y=pn.element_text(margin={'r': 12}),
         panel_border=pn.element_rect(color='gainsboro', size=1, fill=None)
     ), inplace=True)
示例#14
0
def plot_categ_spatial(mod,
                       adata,
                       sample_col,
                       color,
                       n_columns=2,
                       figure_size=(24, 5.7),
                       point_size=0.8,
                       text_size=9):

    for_plot = adata.obs[["imagecol", "imagerow", sample_col]]
    for_plot["color"] = color

    # fix types
    for_plot["color"] = pd.Categorical(for_plot["color"], ordered=True)
    # for_plot['color'] = pd.to_numeric(for_plot['color'])
    for_plot["sample"] = pd.Categorical(for_plot[sample_col], ordered=False)
    for_plot["imagecol"] = pd.to_numeric(for_plot["imagecol"])
    for_plot["imagerow"] = -pd.to_numeric(for_plot["imagerow"])

    ax = (
        plotnine.ggplot(
            for_plot, plotnine.aes(x="imagecol", y="imagerow", color="color"))
        + plotnine.geom_point(size=point_size)  # + plotnine.scale_color_cmap()
        + plotnine.coord_fixed() + plotnine.theme_bw() + plotnine.theme(
            panel_background=plotnine.element_rect(
                fill="black", colour="black", size=0, linetype="solid"),
            panel_grid_major=plotnine.element_line(
                size=0, linetype="solid", colour="black"),
            panel_grid_minor=plotnine.element_line(
                size=0, linetype="solid", colour="black"),
            strip_text=plotnine.element_text(size=text_size),
        ) + plotnine.facet_wrap("~sample", ncol=n_columns) +
        plotnine.theme(figure_size=figure_size))

    return ax
示例#15
0
def mpl_theme(width=12, height=8):
    return [
        pn.theme_matplotlib(),
        pn.theme(figure_size=(width, height),
                 strip_background=pn.element_rect(color='w', fill='w'),
                 panel_grid=pn.element_line(color='k', alpha=.1))
    ]
def plot_score(df, plot_fn):
    f = (p9.ggplot(df, p9.aes(x="emotion_cat", y="score")) +
         p9.geom_boxplot() + p9.labs(x="Model", y="EMOTION FEEL Score") +
         p9.theme_538() + p9.theme(legend_position="top",
                                   legend_direction="horizontal",
                                   figure_size=(10, 5)) +
         p9.theme(plot_background=p9.element_rect(
             fill=BG_COLOR, color=BG_COLOR, size=1)))
    f.save(plot_fn)
示例#17
0
def theme_cognoma(fontsize_mult=1):   
    import plotnine as gg
    
    return (gg.theme_bw(base_size = 14 * fontsize_mult) +
        gg.theme(
          line = gg.element_line(color = "#4d4d4d"), 
          rect = gg.element_rect(fill = "white", color = None), 
          text = gg.element_text(color = "black"), 
          axis_ticks = gg.element_line(color = "#4d4d4d"),
          legend_key = gg.element_rect(color = None), 
          panel_border = gg.element_rect(color = "#4d4d4d"),  
          panel_grid = gg.element_line(color = "#b3b3b3"), 
          panel_grid_major_x = gg.element_blank(),
          panel_grid_minor = gg.element_blank(),
          strip_background = gg.element_rect(fill = "#FEF2E2", color = "#4d4d4d"),
          axis_text = gg.element_text(size = 12 * fontsize_mult, color="#4d4d4d"),
          axis_title_x = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d"),
          axis_title_y = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d")
    ))
def plot_rank_full(df, plot_fn):
    f = (p9.ggplot(df, p9.aes(x="emotion_cat", y="ratio", fill="factor(rank)"))
         + p9.geom_bar(stat="identity") + p9.facet_wrap("cluster_labels_6") +
         p9.labs(x="Model", y="Proportion (%)", fill="Rank") + p9.theme_538() +
         p9.theme(legend_position="top",
                  legend_direction="horizontal",
                  figure_size=(10, 5)) +
         p9.theme(plot_background=p9.element_rect(
             fill=BG_COLOR, color=BG_COLOR, size=1),
                  axis_text_x=p9.element_text(rotation=45, hjust=1)))
    f.save(plot_fn)
示例#19
0
    def scatterplot(cls, df):
        Utils.check_and_make_dir("Figures/Scatterplots")
        df = df[(df['index'] != 'Overall') &
                (df['index'] != 'No ROI')]  # Remove No ROI and Overall rows

        df = df.groupby([config.table_cols, config.table_rows]).apply(
            lambda x: x.sort_values(['Mean']))  # Group by parameters and sort
        df = df.reset_index(drop=True)  # Reset index to remove grouping

        scatterplots = ['roi_ordered', 'stat_ordered']
        if config.table_row_order == 'roi':
            scatterplots.remove('stat')
        elif config.table_row_order == 'statorder':
            scatterplots.remove('roi_ordered')

        for scatterplot in scatterplots:
            if config.verbose:
                print(f"Saving {scatterplot} scatterplot!")

            if scatterplot == 'roi_ordered':
                roi_ord = pd.Categorical(df['index'],
                                         categories=df['index'].unique()
                                         )  # Order rows based on first facet
            else:
                roi_ord = pd.Categorical(
                    df.groupby(['MB', 'SENSE'
                                ]).cumcount())  # Order each facet individually

            figure_table = (
                pltn.ggplot(df, pltn.aes(x="Mean", y=roi_ord)) +
                pltn.geom_point(na_rm=True, size=1) + pltn.geom_errorbarh(
                    pltn.aes(xmin="Mean-Conf_Int_95", xmax="Mean+Conf_Int_95"),
                    na_rm=True,
                    height=None) + pltn.xlim(0, None) +
                pltn.scale_y_discrete(labels=[]) +
                pltn.ylab(config.table_y_label) +
                pltn.xlab(config.table_x_label) +
                pltn.facet_grid('{rows}~{cols}'.format(rows=config.table_rows,
                                                       cols=config.table_cols),
                                drop=True,
                                labeller="label_both") +
                pltn.theme_538()  # Set theme
                + pltn.theme(
                    panel_grid_major_y=pltn.themes.element_line(alpha=0),
                    panel_grid_major_x=pltn.themes.element_line(alpha=1),
                    panel_background=pltn.element_rect(fill="gray", alpha=0.1),
                    dpi=config.plot_dpi))

            figure_table.save(
                f"Figures/Scatterplots/{scatterplot}_scatterplot.png",
                height=config.plot_scale,
                width=config.plot_scale * 3,
                verbose=False,
                limitsize=False)
示例#20
0
def plot_replicate_correlation(
    df,
    batch,
    plate,
    facet_string=None,
    split_samples=False,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=500,
    height=4,
    width=5,
    return_plot=False,
):
    correlation_gg = (
        gg.ggplot(
            df,
            gg.aes(x="group_replicate", y="similarity_metric", fill="group_replicate"),
        )
        + gg.geom_boxplot(
            alpha=0.3, outlier_alpha=0, width=0.8, notchwidth=0.25, fatten=1.5
        )
        + gg.geom_jitter(shape=".", size=0.001, alpha=0.3, width=0.3, height=0)
        + gg.scale_fill_manual(
            name="Replicate",
            labels={"True": "True", "False": "False"},
            values=["#B99638", "#2DB898"],
        )
        + gg.xlab("Replicates")
        + gg.ylab("Pearson Correlation")
        + gg.ggtitle("{}: {}".format(batch, plate))
        + gg.theme_bw()
        + gg.theme(
            subplots_adjust={"wspace": 0.2},
            title=gg.element_text(size=5),
            axis_text=gg.element_text(size=4),
            axis_title=gg.element_text(size=5),
            legend_text=gg.element_text(size=4),
            legend_title=gg.element_text(size=5),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        )
    )

    if split_samples:
        assert facet_string, "To split samples, specify a facet_string"
        correlation_gg += gg.facet_wrap(facet_string)

    if output_file_base:
        save_figure(
            correlation_gg, output_file_base, output_file_extensions, dpi, height, width
        )
    if return_plot:
        return correlation_gg
示例#21
0
class THEME():
    bgcolor = "#293241"
    LOADER_COLOR = "#2a9d8f"
    LOADER_TYPE = "dot"

    colors_light = [
        "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#2a9d8f",
        "#797d62", "#3a6ea5"
    ]
    mt = theme(panel_background=element_rect(fill=bgcolor),
               plot_background=element_rect(fill=bgcolor),
               axis_text_x=element_text(color="black"),
               axis_text_y=element_text(color="black"),
               strip_margin_y=0.05,
               strip_margin_x=0.5)

    cat_colors = scale_fill_manual(values=colors_light)
    cat_colors_lines = scale_color_manual(values=colors_light)
    gradient_colors = scale_fill_gradient("#aad576", "#ce4257")
    FILL = 1
    COLOR = 2

    LONG_FIGURE = (10, 20)
示例#22
0
    def __init__(self, *args, **kwargs):
        """See main class docstring."""
        p9.theme_matplotlib.__init__(self, *args, **kwargs)

        gray = '#D9D9D9'  # gray used in themes.theme_matplotlib

        self.add_theme(
            p9.theme(
                panel_border=p9.element_rect(color=gray, size=0.7),
                axis_line=p9.element_blank(),
                axis_ticks_length=0,
                axis_ticks=p9.element_blank(),
                panel_grid_major=p9.element_line(color=gray, size=0.7),
                panel_grid_minor=p9.element_blank(),
                panel_ontop=True,  # plot panel on top of grid
            ),
            inplace=True)
示例#23
0
def plot_replicate_density(
    df,
    batch,
    plate,
    cutoff,
    percent_strong,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=300,
    height=1.5,
    width=2,
    return_plot=False,
):
    density_gg = (
        gg.ggplot(df, gg.aes(x="similarity_metric", fill="group_replicate"))
        + gg.geom_density(alpha=0.3)
        + gg.scale_fill_manual(
            name="Replicate",
            labels={"True": "True", "False": "False"},
            values=["#B99638", "#2DB898"],
        )
        + gg.xlab("Pearson Correlation")
        + gg.ylab("Density")
        + gg.geom_vline(xintercept=cutoff, color="red", linetype="dashed")
        + gg.ggtitle(
            f"{batch}; Plate: {plate}\n\nPercent Replicating: {np.round(percent_strong * 100, 2)}%"
        )
        + gg.theme_bw()
        + gg.theme(
            title=gg.element_text(size=3.5),
            axis_text=gg.element_text(size=4),
            axis_title=gg.element_text(size=4),
            legend_text=gg.element_text(size=4),
            legend_title=gg.element_text(size=4),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        )
    )

    if output_file_base:
        save_figure(
            density_gg, output_file_base, output_file_extensions, dpi, height, width
        )

    if return_plot:
        return density_gg
示例#24
0
def plot_restaurants_per_neighborhood(filepath, restaurant_data_file,
                                      pittsburgh_shapefile):
    mexican_restaurants = pd.read_csv(filepath + restaurant_data_file)

    gdf = gpd.GeoDataFrame(
        mexican_restaurants,
        geometry=gpd.points_from_xy(mexican_restaurants.longitude,
                                    mexican_restaurants.latitude),
    )

    restaurant_locations = gdf.filter(items=["geometry"])

    # import Pittsburgh neighborhood shapefile
    neighborhood_polygons = gpd.read_file(pittsburgh_shapefile).filter(
        items=["hood", "hood_no", "geometry"])

    # spatial join to figure out which neighborhood each restaurant is in
    restaurants_in_polys = gpd.sjoin(restaurant_locations,
                                     neighborhood_polygons,
                                     how="inner",
                                     op="intersects")

    restaurants_counted = restaurants_in_polys.groupby(
        "hood_no").count().reset_index()
    restaurants_in_hoods = restaurants_counted.filter(
        items=["hood_no", "hood"])
    restaurants_in_hoods.rename(columns={"hood": "num_restaurants"},
                                inplace=True)

    restaurants_per_shape = gpd.GeoDataFrame(
        pd.merge(neighborhood_polygons, restaurants_in_hoods, how="left"))

    restaurant_map = (p.ggplot(restaurants_per_shape) +
                      p.geom_map(p.aes(fill="num_restaurants")) +
                      p.scale_colour_gradient(low="white", high="black") +
                      p.theme(
                          panel_background=p.element_rect(fill="white"),
                          axis_text_x=p.element_blank(),
                          axis_text_y=p.element_blank(),
                          axis_ticks_major_x=p.element_blank(),
                          axis_ticks_major_y=p.element_blank(),
                      )) + p.scale_fill_gradient(
                          low="#efefef", high="#073763", name="# Restaurants")

    restaurant_map.save("restaurant_map.png")
示例#25
0
def theme_tufte(base_size=11, base_family='serif', lines=True, ticks=True):
    """
    Theme inspired by Chapter 6 'Data-Ink Maximization and Graphical Design` of
    Edward Tufte's 'The Visual Display of Quantitative Information`.

    Parameters
    ----------
    base_size : int, optional
        Base font size. All text sizes are scaled versions of the base font
        size. Default is 11.
    base_family : str, optional
        Base font family.
    lines : bool, optional
        Draw axis spines. Default is True.
    ticks : bool, optional
        Draw axis ticks. Default is True.

    Returns
    -------
    Plotnine theme.

    """
    ret = (p9.theme_bw(base_size=base_size, base_family=base_family) +
           p9.theme(legend_background=p9.element_blank(),
                    legend_key=p9.element_blank(),
                    panel_background=p9.element_blank(),
                    strip_background=p9.element_blank(),
                    plot_background=p9.element_rect(fill='white'),
                    axis_line=p9.element_line(size=0.5),
                    axis_ticks=p9.element_line(size=0.5),
                    panel_grid=p9.element_blank()))

    if not ticks:
        ret = ret + p9.theme(axis_ticks=p9.element_blank())
    if not lines:
        ret = ret + p9.theme(axis_line=p9.element_blank())

    return ret
input_data_UMAPencoded_df


# In[12]:


# Plot
fig = ggplot(input_data_UMAPencoded_df, aes(x='1', y='2'))
fig += geom_point(aes(color='dataset'), alpha=0.2)
fig += labs(x ='UMAP 1',
            y = 'UMAP 2',
            title = 'UMAP of normalized compendium')
fig += theme_bw()
fig += theme(
    legend_title_align = "center",
    plot_background=element_rect(fill='white'),
    legend_key=element_rect(fill='white', colour='white'), 
    legend_title=element_text(family='sans-serif', size=15),
    legend_text=element_text(family='sans-serif', size=12),
    plot_title=element_text(family='sans-serif', size=15),
    axis_text=element_text(family='sans-serif', size=12),
    axis_title=element_text(family='sans-serif', size=15)
    )
fig += guides(colour=guide_legend(override_aes={'alpha': 1}))
fig += scale_color_manual(['#ff6666', '#add8e6'])

print(fig)


# **Observations:**
# * There looks to be a good amount of variance in the compendium overall.
def generate_map(data,
                 region,
                 value_field,
                 iso_field='iso',
                 scale_params=None,
                 plot_na_dots=False,
                 tolerance=None,
                 plot_size=8,
                 out_region_color='#f0f0f0',
                 na_color='#aaaaaa',
                 line_color='#666666',
                 projection=None):
    """
    This function returns a map plot with the specified options.

    :param pandas.DataFrame data: Data to be plotted.
    :param str region: Region to center the map around. Countries outside
        the chosen region will be obscured.
    :param str value_field: Column of *data* with the values to be plotted.
    :param str iso_field: Column of *data* with the ISO3 codes for each
        country.
    :param dict scale_params: Dictionary of parameters to be passed to the
        ggplot corresponding color scale (continuous or discrete).
    :param bool plot_na_dots: Whether to plot the dots for small countries
        if said country doesn't have data available.
    :param int tolerance: Coordinate tolerance for polygon simplification,
        a higher number will result in simpler polygons and faster
        rendering (see DEFAULT_TOLERANCES).
    :param int plot_size: Size of the plot, which determines the relative sizes
        of the elements within.
    :param str out_region_color: Hex color of the countries that are out of the
        specified region.
    :param str na_color: Hex color of the countries with no data available.
    :param str line_color: Color of the country borders.
    :param str projection: Kind of map projection to be used in the map.
        Currently, Oceania (XOX) is only available in ESPG:4326 to enable
        wrapping.
    :returns: a ggplot-like plot with the map
    :rtype: plotnine.ggplot
    """
    if projection is None:
        if region == 'XOX':
            projection = 'epsg4326'
        else:
            projection = 'robinson'

    if projection not in PROJECTION_DICT.keys():
        raise ValueError('Projection "{}" not valid'.format(projection))

    if scale_params is None:
        scale_params = {}

    if region not in REGION_BOUNDS[projection]:
        raise ValueError(
            '"region" not available. Valid regions are: {}'.format(', '.join(
                REGION_BOUNDS[projection].keys())))

    if tolerance is None:
        tolerance = DEFAULT_TOLERANCES[projection][region]

    countries = GeoDataFrame.from_file(
        os.path.join(os.path.dirname(__file__), 'data/world-countries.shp'))

    # To plot Oceania we need the original EPSG:4326 to wrap around the 180º
    # longitude. In other cases transform to the desired projection.
    if region == 'XOX':
        countries.crs['lon_wrap'] = '180'  # Wrap around longitude 180º

        XOX_countries = countries['continent'] == 'XOX'
        countries[XOX_countries] = countries[XOX_countries].to_crs(
            countries.crs)
        centroids = countries[XOX_countries].apply(
            lambda row: row['geometry'].centroid, axis=1)
        countries.loc[XOX_countries, 'lon'] = [c.x for c in centroids]
        countries.loc[XOX_countries, 'lat'] = [c.y for c in centroids]
    else:
        if projection != 'epsg4326':
            countries = countries.to_crs(PROJECTION_DICT[projection])
            centroids = countries.apply(lambda row: row['geometry'].centroid,
                                        axis=1)
            countries['lon'] = [c.x for c in centroids]
            countries['lat'] = [c.y for c in centroids]

    countries['geometry'] = countries['geometry'].simplify(tolerance)

    upper_left, lower_right = REGION_BOUNDS[projection][region]
    limits_x = [upper_left[0], lower_right[0]]
    limits_y = [lower_right[1], upper_left[1]]
    ratio = (limits_x[1] - limits_x[0]) / (limits_y[1] - limits_y[0])

    plot_data = pd.merge(countries,
                         data,
                         how='left',
                         left_on='iso',
                         right_on=iso_field)
    map_bounds = REGION_BOUNDS['epsg4326'][region]
    map_area = ((map_bounds[1][0] - map_bounds[0][0]) *
                (map_bounds[0][1] - map_bounds[1][1]))
    plot_data['plot_dot'] = (plot_data['pol_area'] < DOT_THRESHOLD * map_area)

    if not plot_na_dots:
        plot_data['plot_dot'] &= ~pd.isnull(plot_data[value_field])

    if region != 'XWX':
        in_region = ((~pd.isnull(plot_data[value_field])) &
                     (plot_data['continent'] == region))
        in_region_missing = ((pd.isnull(plot_data[value_field])) &
                             (plot_data['continent'] == region))
        out_region = plot_data['continent'] != region
    else:
        in_region = ~pd.isnull(plot_data[value_field])
        in_region_missing = pd.isnull(plot_data[value_field])
        out_region = np.repeat(False, len(plot_data))

    if plot_data[value_field].dtype == 'object':
        # Assume discrete values
        fill_scale = scale_fill_brewer(**scale_params, drop=False)
    else:
        # Assume continuous values
        fill_scale = scale_fill_gradient(**scale_params)

    plot_data_values = plot_data[in_region]
    plot_data_missing = plot_data[in_region_missing]
    plot_data_out_region = plot_data[out_region]

    dots_region = plot_data_values[plot_data_values['plot_dot']]
    dots_region_missing = plot_data_missing[plot_data_missing['plot_dot']]
    dots_out_region = plot_data_out_region[plot_data_out_region['plot_dot']]

    plt = (
        ggplot() + geom_map(plot_data_values,
                            aes(fill=value_field),
                            color=line_color,
                            size=0.3) +
        geom_map(
            plot_data_missing, aes(color='plot_dot'), fill=na_color,
            size=0.3) + geom_map(plot_data_out_region,
                                 fill=out_region_color,
                                 color=line_color,
                                 size=0.3) +
        geom_point(dots_region,
                   aes(x='lon', y='lat', fill=value_field),
                   size=3,
                   stroke=.1,
                   color=line_color) + geom_point(dots_region_missing,
                                                  aes(x='lon', y='lat'),
                                                  fill=na_color,
                                                  size=3,
                                                  stroke=.1,
                                                  color=line_color) +
        geom_point(dots_out_region,
                   aes(x='lon', y='lat'),
                   fill=out_region_color,
                   size=3,
                   stroke=.1,
                   color=line_color) +
        scale_x_continuous(breaks=[], limits=limits_x) +
        scale_y_continuous(breaks=[], limits=limits_y) + theme(
            figure_size=(plot_size * ratio, plot_size),
            panel_background=element_rect(fill='white', color='black'),
            #  panel_border=element_rect(fill='white',
            #                            color='black',
            #                            size=.1),
            legend_background=element_rect(
                fill="white", color='black', size=.5),
            legend_box_just='left') + xlab('') + ylab(''))

    if len(plot_data_values.index) > 0:
        plt += fill_scale

    plt += scale_color_manual(name=' ',
                              values=[line_color],
                              breaks=[False],
                              labels=['No data available'])

    if plot_data[value_field].dtype == 'object':
        plt += guides(fill=guide_legend(override_aes={'shape': None}))

    return {
        'plot': plt,
        'ratio': ratio,
    }
示例#28
0
def plot_factor_spatial(
        adata,
        fact,
        cluster_names,
        fact_ind=[0],
        trans="log",
        sample_name=None,
        samples_col="sample",
        obs_x="imagecol",
        obs_y="imagerow",
        n_columns=6,
        max_col=5000,
        col_breaks=[0.1, 100, 1000, 3000],
        figure_size=(24, 5.7),
        point_size=0.8,
        text_size=9,
):
    r"""Plot expression of factors / cell types in space.
    Convenient but not as powerful as scanpy plotting.

    :param adata: anndata object with spatial data
    :param fact: pd.DataFrame with spatial expression of factors (W), e.g. mod.spot_factors_df
    :param cluster_names: names of those factors to show on a plot
    :param fact_ind: index of factors to plot
    :param trans: transform colorscale? passed to plotnine.scale_color_cmap
    :param sample_name: if anndata object contains multiple samples specify which sample to plot (no warning given if not)
    :param samples_col: if anndata object contains multiple which .obs columns specifies sample?
    :param obs_x: which .obs columns specifies x coordinate?
    :param obs_y: which .obs columns specifies y coordinate?
    :param n_columns: how many factors / clusters to plot in each row (plotnine.facet_grid)
    :param max_col: colorscale maximum expression in fact
    :param col_breaks: colorscale breaks
    :param figure_size: figures size works weirdly (only x axis has an effect, use 24 for 6-column plot, 12 for 3, 8 for 2 ...).
    :param point_size: point size of spots
    :param text_size: text size
    """

    if sample_name is not None:
        sample_ind = np.isin(adata.obs[samples_col], sample_name)
    else:
        sample_ind = np.repeat(True, adata.shape[0])

    # adata.obsm['X_spatial'][:,0] vs adata.obs['imagecol'] & adata.obs['imagerow']

    for_plot = np.concatenate(
        (
            adata.obs[obs_x].values.reshape((adata.obs.shape[0], 1)),
            -adata.obs[obs_y].values.reshape((adata.obs.shape[0], 1)),
            fact.iloc[:, fact_ind[0]].values.reshape((adata.obs.shape[0], 1)),
            np.array([
                cluster_names[fact_ind[0]] for j in range(adata.obs.shape[0])
            ]).reshape((adata.obs.shape[0], 1)),
        ),
        1,
    )
    for_plot = pd.DataFrame(
        for_plot,
        index=adata.obs.index,
        columns=["imagecol", "imagerow", "weights", "cluster"])
    # select only correct sample
    for_plot = for_plot.loc[sample_ind, :]

    for i in fact_ind[1:]:
        for_plot1 = np.concatenate(
            (
                adata.obs[obs_x].values.reshape((adata.obs.shape[0], 1)),
                -adata.obs[obs_y].values.reshape((adata.obs.shape[0], 1)),
                fact.iloc[:, i].values.reshape((adata.obs.shape[0], 1)),
                np.array([cluster_names[i]
                          for j in range(adata.obs.shape[0])]).reshape(
                              (adata.obs.shape[0], 1)),
            ),
            1,
        )
        for_plot1 = pd.DataFrame(
            for_plot1,
            index=adata.obs.index,
            columns=["imagecol", "imagerow", "weights", "cluster"])
        # select only correct sample
        for_plot1 = for_plot1.loc[sample_ind, :]
        for_plot = pd.concat((for_plot, for_plot1))

    for_plot["imagecol"] = pd.to_numeric(for_plot["imagecol"])
    for_plot["imagerow"] = pd.to_numeric(for_plot["imagerow"])
    for_plot["weights"] = pd.to_numeric(for_plot["weights"])
    for_plot["cluster"] = pd.Categorical(for_plot["cluster"],
                                         categories=cluster_names[fact_ind],
                                         ordered=True)

    # print(np.log(np.max(for_plot['weights'])))
    ax = (plotnine.ggplot(
        for_plot, plotnine.aes("imagecol", "imagerow", color="weights")) +
          plotnine.geom_point(size=point_size) +
          plotnine.scale_color_cmap("magma",
                                    trans=trans,
                                    limits=[0.1, max_col],
                                    breaks=col_breaks + [max_col]) +
          plotnine.coord_fixed() + plotnine.theme_bw() + plotnine.theme(
              panel_background=plotnine.element_rect(
                  fill="black", colour="black", size=0, linetype="solid"),
              panel_grid_major=plotnine.element_line(
                  size=0, linetype="solid", colour="black"),
              panel_grid_minor=plotnine.element_line(
                  size=0, linetype="solid", colour="black"),
              strip_text=plotnine.element_text(size=text_size),
          ) + plotnine.facet_wrap("~cluster", ncol=n_columns) +
          plotnine.ggtitle("nUMI from each cell type") +
          plotnine.theme(figure_size=figure_size))

    return ax
)

# Add back label column
normalized_all_data_UMAPencoded_df["sample group"] = normalized_all_data[
    "sample group"]

# Plot
fig = pn.ggplot(normalized_all_data_UMAPencoded_df, pn.aes(x="1", y="2"))
fig += pn.geom_point(pn.aes(color="sample group"), alpha=0.4)
fig += pn.labs(x="UMAP 1",
               y="UMAP 2",
               title="Gene expression data in gene space")
fig += pn.theme_bw()
fig += pn.theme(
    legend_title_align="center",
    plot_background=pn.element_rect(fill="white"),
    legend_key=pn.element_rect(fill="white", colour="white"),
    legend_title=pn.element_text(family="sans-serif", size=15),
    legend_text=pn.element_text(family="sans-serif", size=12),
    plot_title=pn.element_text(family="sans-serif", size=15),
    axis_text=pn.element_text(family="sans-serif", size=12),
    axis_title=pn.element_text(family="sans-serif", size=15),
)
fig += pn.scale_color_manual(["#bdbdbd", "red", "blue"])
fig += pn.guides(colour=pn.guide_legend(override_aes={"alpha": 1}))

fig += pn.scales.xlim(9, 10)
print(fig)
# -

# Based on a UMAP of the normalized gene expression data, it looks like there isn't a clear separation between WT and mutant samples, though there are only 2 samples per group so this type of clustering observation is limited.
                 color ='darkgrey',
                size=0.5) \
    + geom_errorbar(all_svcca[all_svcca['Group'] == 'uncorrected'],
                  aes(x=lst_num_experiments, ymin='ymin', ymax='ymax'),
                   color='darkgrey') \
    + geom_line(threshold,
                aes(x=lst_num_experiments, y='score'),
                linetype='dashed',
                size=1,
                color="darkgrey",
                show_legend=False) \
    + labs(x = "Number of Partitions",
           y = "Similarity score (SVCCA)",
           title = "Similarity across varying numbers of partitions") \
    + theme(plot_title=element_text(weight='bold'),
            plot_background=element_rect(fill="white"),
            panel_background=element_rect(fill="white"),
            panel_grid_major_x=element_line(color="lightgrey"),
            panel_grid_major_y=element_line(color="lightgrey"),
            axis_line=element_line(color="grey"),
            legend_key=element_rect(fill='white', colour='white')
           ) \
    + scale_color_manual(['#b3e5fc']) \

print(g)
ggsave(plot=g, filename=svcca_uncorrected_file, dpi=300)

# In[9]:

# Plot - black
lst_num_experiments = list(all_svcca.index[0:int(len(all_svcca.index) / 2)])
os.makedirs(output_figuresdir, exist_ok=True)
output_file = pathlib.Path(
    output_figuresdir, "all_cellpainting_cellquality_across_sites.png"
)
if check_if_write(output_file, force, throw_warning=True):
    cell_count_gg.save(output_file, dpi=300, width=10, height=7, verbose=False)

# Same graph as above, separated by well.
cell_count_gg_parsed = (
    gg.ggplot(cell_count_df, gg.aes(x="site", y="cell_count"))
    + gg.geom_bar(gg.aes(fill="Cell_Quality"), stat="identity")
    + gg.theme_bw()
    + gg.theme(
        axis_text_x=gg.element_text(rotation=90, size=5),
        strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
    )
    + gg.xlab("Sites")
    + gg.ylab("Cell Count")
    + gg.scale_fill_manual(
        name="Cell Quality", labels=cell_category_order, values=cell_category_colors
    )
    + gg.facet_wrap("~well", drop=False, scales="free_x")
)

output_file = pathlib.Path(
    output_figuresdir, "all_cellpainting_cellquality_across_sites_by_well.png"
)
if check_if_write(output_file, force, throw_warning=True):
    cell_count_gg_parsed.save(output_file, dpi=300, width=10, height=7, verbose=False)
                 color ='darkgrey',
                size=0.5) \
    + geom_errorbar(all_svcca,
                  aes(x=lst_num_partitions, ymin='ymin', ymax='ymax'),
                   color='darkgrey') \
    + geom_line(threshold,
                aes(x=lst_num_partitions, y='score'),
                linetype='dashed',
                size=1,
                color="darkgrey",
                show_legend=False) \
    + labs(x = "Number of Partitions",
           y = "Similarity score (SVCCA)",
           title = "Similarity across varying numbers of partitions") \
    + theme(plot_title=element_text(weight='bold'),
            plot_background=element_rect(fill="white"),
            panel_background=element_rect(fill="white"),
            panel_grid_major_x=element_line(color="lightgrey"),
            panel_grid_major_y=element_line(color="lightgrey"),
            axis_line=element_line(color="grey"),
            legend_key=element_rect(fill='white', colour='white')
           ) \
    + scale_color_manual(['#1976d2', '#b3e5fc']) \

print(panel_A)
ggsave(plot=panel_A, filename=svcca_file, device="svg", dpi=300)
ggsave(plot=panel_A, filename=svcca_png_file, device="svg", dpi=300)

# ## Uncorrected PCA panel

# In[9]: