def plot_significance_vs_ranking( summary_df, method_name, x_label, output_figure_filename ): # Format input dataframe plot_df = pd.DataFrame( data={ "Test statistic": summary_df[ method_stats_dict[method_name] + " (Real)" ].values, "Percentile rank": summary_df["Rank (simulated)"].rank(pct=True).values, }, index=summary_df.index, ) fig = pn.ggplot(plot_df, pn.aes(x="Test statistic", y="Percentile rank")) fig += pn.geom_point() fig += pn.geom_point( plot_df[plot_df["Percentile rank"] > 0.9], pn.aes(x="Test statistic", y="Percentile rank"), color="red", ) fig += pn.geom_text( pn.aes( label=[ x if plot_df.loc[x, "Percentile rank"] > 0.9 else "" for x in plot_df.index ] ), ha="left", va="top", size=5, ) fig += pn.labs( x=x_label, y="Percentile of ranking", title=f"{method_name} pathway statistics vs ranking", ) fig += pn.theme_bw() fig += pn.theme( legend_title_align="center", plot_background=pn.element_rect(fill="white"), legend_key=pn.element_rect(fill="white", colour="white"), legend_title=pn.element_text(family="sans-serif", size=15), legend_text=pn.element_text(family="sans-serif", size=12), plot_title=pn.element_text(family="sans-serif", size=15), axis_text=pn.element_text(family="sans-serif", size=12), axis_title=pn.element_text(family="sans-serif", size=15), ) print(fig) # Save figure fig.save( output_figure_filename, format="svg", bbox_inches="tight", transparent=True, pad_inches=0, dpi=300, )
def plot_fitting(x, y, resonance_frequency, parameter): """ Plots the phase response and the corresponding fit of the harmonic damped oscillator. Args: x (`float array`): X coordinates (frequency in kHz) y (`float array`): Y coordinates (phase in radians) resonance_frequency (`float array`): Resonance frequency given by the fit of x and y parameter (`float array`): Others parameters of function fit (Q factor, offset, linear background) Returns: p (`ggplot object`): Returns a ggplot object """ y_fit = fit_function(x, resonance_frequency, parameter[0], parameter[1], parameter[2]) y_fit.name = 'Phase fit' x.name = 'Frequency (kHz)' y.name = 'Phase (rad)' data = concat([x, y, y_fit], axis=1) col_names = list(data) # Plot data p = ggplot(aes(x=col_names[0], y=col_names[1]), data=data) + \ geom_point() + \ geom_line(aes(x=col_names[0], y=col_names[2]), color='red', size=0.5) + \ theme_seaborn(style='ticks', context='talk', font_scale=0.75) + \ theme(figure_size=(15, 7), strip_background=element_rect(fill='white'), axis_line_x=element_line(color='black'), axis_line_y=element_line(color='black'), legend_key=element_rect(fill='white', color='white')) return p
def plot_paired_ranking( method1_summary_df, method2_summary_df, method1_name, method2_name, output_figure_filename, ): # Join dataframes to make sure the rows are aligned merged_summary_df = method1_summary_df.merge( method2_summary_df, left_index=True, right_index=True, suffixes=[f"_{method1_name}", f"_{method2_name}"], ) # Format input dataframe plot_df = pd.DataFrame( data={ "Method1 ranking": merged_summary_df[ f"Percentile (simulated)_{method1_name}" ].values, "Method2 ranking": merged_summary_df[ f"Percentile (simulated)_{method2_name}" ].values, }, index=merged_summary_df.index, ) fig = pn.ggplot(plot_df, pn.aes(x="Method1 ranking", y="Method2 ranking")) fig += pn.geom_point() fig += pn.labs( x=f"{method1_name} pathway ranking", y=f"{method2_name} pathway ranking", title=f"{method1_name} vs {method2_name} pathway ranking", ) fig += pn.theme_bw() fig += pn.theme( legend_title_align="center", plot_background=pn.element_rect(fill="white"), legend_key=pn.element_rect(fill="white", colour="white"), legend_title=pn.element_text(family="sans-serif", size=15), legend_text=pn.element_text(family="sans-serif", size=12), plot_title=pn.element_text(family="sans-serif", size=15), axis_text=pn.element_text(family="sans-serif", size=12), axis_title=pn.element_text(family="sans-serif", size=15), ) # Save figure fig.save( output_figure_filename, format="svg", bbox_inches="tight", transparent=True, pad_inches=0, dpi=300, ) print(fig)
def __init__(self, base_size=11, base_family='DejaVu Sans'): theme_light.__init__(self, base_size, base_family) self.add_theme(theme( axis_ticks=element_line(color='#DDDDDD', size=0.5), panel_border=element_rect(fill='None', color='#838383', size=1), strip_background=element_rect( fill='#DDDDDD', color='#838383', size=1), strip_text_x=element_text(color='black'), strip_text_y=element_text(color='black', angle=-90) ), inplace=True)
def plot_response_shift(x, y, resonance_frequency_without, parameter_without, xx, yy, resonance_frequency_with, parameter): """ Plots the phase response of pre start data without and with cell attached to cantilever with the respective function fit. Args: x (`float array`): X coordinates w/o cell (frequency in kHz) y (`float array`): Y coordinates w/o cell (phase in radians) xx (`float array`): X coordinates w/ cell(frequency in kHz) yy (`float array`): Y coordinates w/ cell (phase in radians) resonance_frequency_without (`float array`): Resonance frequency given by the fit of x and y w/o cell resonance_frequency_with (`float array`): Resonance frequency given by the fit of x and y w/ cell parameter (`float array`): Others parameters of function fit (Q factor, offset, linear background) w/o cell parameter_without (`float array`): Others parameters of function fit (Q factor, offset, linear background) w/ cell Returns: p (`ggplot object`): Returns a ggplot object """ y_fit_without = fit_function(x, resonance_frequency_without, parameter_without[0], parameter_without[1], parameter_without[2]) y_fit_with = fit_function(xx, resonance_frequency_with, parameter[0], parameter[1], parameter[2]) y_fit_without.name = 'Phase fit w/o cell att.' y_fit_with.name = 'Phase fit w cell att.' x.name = 'Frequency without (kHz)' y.name = 'Raw phase w/o cell att.' xx.name = 'Frequency with (kHz)' yy.name = 'Raw phase w cell att.' data = concat([x, y, y_fit_without, xx, yy, y_fit_with], axis=1) df = melt(data, id_vars=['Frequency with (kHz)'], value_vars=['Phase fit w cell att.', 'Phase fit w/o cell att.']) df.loc[df['variable'] == 'Phase fit w/o cell att.', 'Frequency with (kHz)'] = x.values df2 = melt(data, id_vars=['Frequency with (kHz)'], value_vars=['Raw phase w cell att.', 'Raw phase w/o cell att.']) df2.loc[df2['variable'] == 'Raw phase w/o cell att.', 'Frequency with (kHz)'] = x.values # Plot data p = ggplot(data=df) + \ geom_point(aes(x="Frequency with (kHz)", y='value', fill='variable'), data=df2, alpha=0.6) + \ geom_line(aes(x="Frequency with (kHz)", y='value', color='variable')) + \ xlab('Frequency (kHz)') + \ ylab('Phase (rad)') + \ labs(fill='Raw data', color='Function fits') + \ theme_seaborn(style='ticks', context='talk', font_scale=0.75) + \ theme(figure_size=(15, 7), strip_background=element_rect(fill='white'), axis_line_x=element_line(color='black'), axis_line_y=element_line(color='black'), legend_key=element_rect(fill='white', color='white')) return p
def theme_energinet() -> p9.themes.theme: """Create a simple Energinet theme.""" return p9.theme( text=p9.element_text(family=endktheme.style.font_family()), axis_line=p9.element_line(color="black"), plot_background=p9.element_blank(), panel_background=p9.element_rect(fill="white"), legend_background=p9.element_rect(fill="white"), legend_key=p9.element_blank(), panel_grid=p9.element_blank(), axis_ticks=p9.element_blank(), )
def __init__(self, base_size=11, base_family='DejaVu Sans'): theme_light.__init__(self, base_size, base_family) self.add_theme(theme( axis_ticks=element_line(color='#DDDDDD', size=0.5), panel_border=element_rect(fill='None', color='#838383', size=1), strip_background=element_rect( fill='#DDDDDD', color='#838383', size=1), strip_text_x=element_text(color='black'), strip_text_y=element_text(color='black', angle=-90), legend_key=element_blank() ), inplace=True)
def __init__(self, base_size=11, base_family="DejaVu Sans"): theme_light.__init__(self, base_size, base_family) self.add_theme( theme( axis_ticks=element_line(color="#DDDDDD", size=0.5), panel_border=element_rect(fill="None", color="#838383", size=1), strip_background=element_rect(fill="#DDDDDD", color="#838383", size=1), strip_text_x=element_text(color="black"), strip_text_y=element_text(color="black", angle=-90), legend_key=element_blank(), ), inplace=True, )
def plot_bargraph(count_plot_df, plot_df): """ Plots the bargraph Arguments: count_plot_df - The dataframe that contains lemma counts plot_df - the dataframe that contains the odds ratio and lemmas """ graph = ( p9.ggplot(count_plot_df.astype({"count": int}), p9.aes(x="lemma", y="count")) + p9.geom_col(position=p9.position_dodge(width=0.5), fill="#253494") + p9.coord_flip() + p9.facet_wrap("repository", scales='free_x') + p9.scale_x_discrete(limits=(plot_df.sort_values( "odds_ratio", ascending=True).lemma.tolist())) + p9.scale_y_continuous(labels=custom_format('{:,.0g}')) + p9.labs(x=None) + p9.theme_seaborn( context='paper', style="ticks", font="Arial", font_scale=0.95) + p9.theme( # 640 x 480 figure_size=(6.66, 5), strip_background=p9.element_rect(fill="white"), strip_text=p9.element_text(size=12), axis_title=p9.element_text(size=12), axis_text_x=p9.element_text(size=10), )) return graph
def plot_downstream(clwe, table, output, ylim): df = pd.read_csv(data_file(table)) df = df[df.clwe == clwe] df = df.assign( refine=pd.Categorical(df['refine'], ['Original', '+retrofit', '+synthetic']), language=pd.Categorical(df['language'], ['DE', 'ES', 'FR', 'IT', 'JA', 'RU', 'ZH', 'AVG']) ) g = p9.ggplot(df, p9.aes(x='language', y='accuracy', fill='refine')) g += p9.geom_bar(position='dodge', stat='identity', width=.8) g += p9.coord_cartesian(ylim=ylim) g += p9.scale_fill_manual(['#999999', '#EA5F94', '#FFB14E']) g += p9.theme_void(base_size=FONT_SIZE, base_family='Arial') g += p9.theme( plot_background=p9.element_rect(fill='white'), panel_grid_major_y=p9.element_line(), axis_text_x=p9.element_text(margin={'t': 10}), axis_text_y=p9.element_text(margin={'r': 8}), legend_position=(.7, .9), legend_direction='horizontal', legend_title=p9.element_blank(), legend_text=p9.element_text(size=FONT_SIZE), legend_box_margin=0, figure_size=(12, 3) ) g.save(filename=output_file(output))
def theme_cognoma(fontsize_mult=1): return (gg.theme_bw(base_size=14 * fontsize_mult) + gg.theme( line=gg.element_line(color="#4d4d4d"), rect=gg.element_rect(fill="white", color=None), text=gg.element_text(color="black"), axis_ticks=gg.element_line(color="#4d4d4d"), legend_key=gg.element_rect(color=None), panel_border=gg.element_rect(color="#4d4d4d"), panel_grid=gg.element_line(color="#b3b3b3"), panel_grid_major_x=gg.element_blank(), panel_grid_minor=gg.element_blank(), strip_background=gg.element_rect(fill="#FEF2E2", color="#4d4d4d"), axis_text=gg.element_text(size=12 * fontsize_mult, color="#4d4d4d"), axis_title_x=gg.element_text(size=13 * fontsize_mult, color="#4d4d4d"), axis_title_y=gg.element_text(size=13 * fontsize_mult, color="#4d4d4d")))
def plot_replicate_density( df, batch, plate, output_file_base=None, output_file_extensions=[".png", ".pdf", ".svg"], dpi=300, height=1.5, width=2, ): density_gg = ( gg.ggplot(df, gg.aes(x="pairwise_correlation", fill="replicate_info")) + gg.geom_density(alpha=0.3) + gg.scale_fill_manual( name="Replicate", labels={ "True": "True", "False": "False" }, values=["#B99638", "#2DB898"], ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") + gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme( title=gg.element_text(size=9), axis_text=gg.element_text(size=5), axis_title=gg.element_text(size=8), legend_text=gg.element_text(size=6), legend_title=gg.element_text(size=7), strip_text=gg.element_text(size=4, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), )) if output_file_base: save_figure(density_gg, output_file_base, output_file_extensions, dpi, height, width) return density_gg
def __init__(self): pn.theme_minimal.__init__(self, base_family='Open Sans') self.add_theme(pn.theme( axis_title=pn.element_text(size=10), axis_title_y=pn.element_text(margin={'r': 12}), panel_border=pn.element_rect(color='gainsboro', size=1, fill=None) ), inplace=True)
def plot_categ_spatial(mod, adata, sample_col, color, n_columns=2, figure_size=(24, 5.7), point_size=0.8, text_size=9): for_plot = adata.obs[["imagecol", "imagerow", sample_col]] for_plot["color"] = color # fix types for_plot["color"] = pd.Categorical(for_plot["color"], ordered=True) # for_plot['color'] = pd.to_numeric(for_plot['color']) for_plot["sample"] = pd.Categorical(for_plot[sample_col], ordered=False) for_plot["imagecol"] = pd.to_numeric(for_plot["imagecol"]) for_plot["imagerow"] = -pd.to_numeric(for_plot["imagerow"]) ax = ( plotnine.ggplot( for_plot, plotnine.aes(x="imagecol", y="imagerow", color="color")) + plotnine.geom_point(size=point_size) # + plotnine.scale_color_cmap() + plotnine.coord_fixed() + plotnine.theme_bw() + plotnine.theme( panel_background=plotnine.element_rect( fill="black", colour="black", size=0, linetype="solid"), panel_grid_major=plotnine.element_line( size=0, linetype="solid", colour="black"), panel_grid_minor=plotnine.element_line( size=0, linetype="solid", colour="black"), strip_text=plotnine.element_text(size=text_size), ) + plotnine.facet_wrap("~sample", ncol=n_columns) + plotnine.theme(figure_size=figure_size)) return ax
def mpl_theme(width=12, height=8): return [ pn.theme_matplotlib(), pn.theme(figure_size=(width, height), strip_background=pn.element_rect(color='w', fill='w'), panel_grid=pn.element_line(color='k', alpha=.1)) ]
def plot_score(df, plot_fn): f = (p9.ggplot(df, p9.aes(x="emotion_cat", y="score")) + p9.geom_boxplot() + p9.labs(x="Model", y="EMOTION FEEL Score") + p9.theme_538() + p9.theme(legend_position="top", legend_direction="horizontal", figure_size=(10, 5)) + p9.theme(plot_background=p9.element_rect( fill=BG_COLOR, color=BG_COLOR, size=1))) f.save(plot_fn)
def theme_cognoma(fontsize_mult=1): import plotnine as gg return (gg.theme_bw(base_size = 14 * fontsize_mult) + gg.theme( line = gg.element_line(color = "#4d4d4d"), rect = gg.element_rect(fill = "white", color = None), text = gg.element_text(color = "black"), axis_ticks = gg.element_line(color = "#4d4d4d"), legend_key = gg.element_rect(color = None), panel_border = gg.element_rect(color = "#4d4d4d"), panel_grid = gg.element_line(color = "#b3b3b3"), panel_grid_major_x = gg.element_blank(), panel_grid_minor = gg.element_blank(), strip_background = gg.element_rect(fill = "#FEF2E2", color = "#4d4d4d"), axis_text = gg.element_text(size = 12 * fontsize_mult, color="#4d4d4d"), axis_title_x = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d"), axis_title_y = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d") ))
def plot_rank_full(df, plot_fn): f = (p9.ggplot(df, p9.aes(x="emotion_cat", y="ratio", fill="factor(rank)")) + p9.geom_bar(stat="identity") + p9.facet_wrap("cluster_labels_6") + p9.labs(x="Model", y="Proportion (%)", fill="Rank") + p9.theme_538() + p9.theme(legend_position="top", legend_direction="horizontal", figure_size=(10, 5)) + p9.theme(plot_background=p9.element_rect( fill=BG_COLOR, color=BG_COLOR, size=1), axis_text_x=p9.element_text(rotation=45, hjust=1))) f.save(plot_fn)
def scatterplot(cls, df): Utils.check_and_make_dir("Figures/Scatterplots") df = df[(df['index'] != 'Overall') & (df['index'] != 'No ROI')] # Remove No ROI and Overall rows df = df.groupby([config.table_cols, config.table_rows]).apply( lambda x: x.sort_values(['Mean'])) # Group by parameters and sort df = df.reset_index(drop=True) # Reset index to remove grouping scatterplots = ['roi_ordered', 'stat_ordered'] if config.table_row_order == 'roi': scatterplots.remove('stat') elif config.table_row_order == 'statorder': scatterplots.remove('roi_ordered') for scatterplot in scatterplots: if config.verbose: print(f"Saving {scatterplot} scatterplot!") if scatterplot == 'roi_ordered': roi_ord = pd.Categorical(df['index'], categories=df['index'].unique() ) # Order rows based on first facet else: roi_ord = pd.Categorical( df.groupby(['MB', 'SENSE' ]).cumcount()) # Order each facet individually figure_table = ( pltn.ggplot(df, pltn.aes(x="Mean", y=roi_ord)) + pltn.geom_point(na_rm=True, size=1) + pltn.geom_errorbarh( pltn.aes(xmin="Mean-Conf_Int_95", xmax="Mean+Conf_Int_95"), na_rm=True, height=None) + pltn.xlim(0, None) + pltn.scale_y_discrete(labels=[]) + pltn.ylab(config.table_y_label) + pltn.xlab(config.table_x_label) + pltn.facet_grid('{rows}~{cols}'.format(rows=config.table_rows, cols=config.table_cols), drop=True, labeller="label_both") + pltn.theme_538() # Set theme + pltn.theme( panel_grid_major_y=pltn.themes.element_line(alpha=0), panel_grid_major_x=pltn.themes.element_line(alpha=1), panel_background=pltn.element_rect(fill="gray", alpha=0.1), dpi=config.plot_dpi)) figure_table.save( f"Figures/Scatterplots/{scatterplot}_scatterplot.png", height=config.plot_scale, width=config.plot_scale * 3, verbose=False, limitsize=False)
def plot_replicate_correlation( df, batch, plate, facet_string=None, split_samples=False, output_file_base=None, output_file_extensions=[".png", ".pdf", ".svg"], dpi=500, height=4, width=5, return_plot=False, ): correlation_gg = ( gg.ggplot( df, gg.aes(x="group_replicate", y="similarity_metric", fill="group_replicate"), ) + gg.geom_boxplot( alpha=0.3, outlier_alpha=0, width=0.8, notchwidth=0.25, fatten=1.5 ) + gg.geom_jitter(shape=".", size=0.001, alpha=0.3, width=0.3, height=0) + gg.scale_fill_manual( name="Replicate", labels={"True": "True", "False": "False"}, values=["#B99638", "#2DB898"], ) + gg.xlab("Replicates") + gg.ylab("Pearson Correlation") + gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme( subplots_adjust={"wspace": 0.2}, title=gg.element_text(size=5), axis_text=gg.element_text(size=4), axis_title=gg.element_text(size=5), legend_text=gg.element_text(size=4), legend_title=gg.element_text(size=5), strip_text=gg.element_text(size=4, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), ) ) if split_samples: assert facet_string, "To split samples, specify a facet_string" correlation_gg += gg.facet_wrap(facet_string) if output_file_base: save_figure( correlation_gg, output_file_base, output_file_extensions, dpi, height, width ) if return_plot: return correlation_gg
class THEME(): bgcolor = "#293241" LOADER_COLOR = "#2a9d8f" LOADER_TYPE = "dot" colors_light = [ "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#2a9d8f", "#797d62", "#3a6ea5" ] mt = theme(panel_background=element_rect(fill=bgcolor), plot_background=element_rect(fill=bgcolor), axis_text_x=element_text(color="black"), axis_text_y=element_text(color="black"), strip_margin_y=0.05, strip_margin_x=0.5) cat_colors = scale_fill_manual(values=colors_light) cat_colors_lines = scale_color_manual(values=colors_light) gradient_colors = scale_fill_gradient("#aad576", "#ce4257") FILL = 1 COLOR = 2 LONG_FIGURE = (10, 20)
def __init__(self, *args, **kwargs): """See main class docstring.""" p9.theme_matplotlib.__init__(self, *args, **kwargs) gray = '#D9D9D9' # gray used in themes.theme_matplotlib self.add_theme( p9.theme( panel_border=p9.element_rect(color=gray, size=0.7), axis_line=p9.element_blank(), axis_ticks_length=0, axis_ticks=p9.element_blank(), panel_grid_major=p9.element_line(color=gray, size=0.7), panel_grid_minor=p9.element_blank(), panel_ontop=True, # plot panel on top of grid ), inplace=True)
def plot_replicate_density( df, batch, plate, cutoff, percent_strong, output_file_base=None, output_file_extensions=[".png", ".pdf", ".svg"], dpi=300, height=1.5, width=2, return_plot=False, ): density_gg = ( gg.ggplot(df, gg.aes(x="similarity_metric", fill="group_replicate")) + gg.geom_density(alpha=0.3) + gg.scale_fill_manual( name="Replicate", labels={"True": "True", "False": "False"}, values=["#B99638", "#2DB898"], ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") + gg.geom_vline(xintercept=cutoff, color="red", linetype="dashed") + gg.ggtitle( f"{batch}; Plate: {plate}\n\nPercent Replicating: {np.round(percent_strong * 100, 2)}%" ) + gg.theme_bw() + gg.theme( title=gg.element_text(size=3.5), axis_text=gg.element_text(size=4), axis_title=gg.element_text(size=4), legend_text=gg.element_text(size=4), legend_title=gg.element_text(size=4), strip_text=gg.element_text(size=4, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), ) ) if output_file_base: save_figure( density_gg, output_file_base, output_file_extensions, dpi, height, width ) if return_plot: return density_gg
def plot_restaurants_per_neighborhood(filepath, restaurant_data_file, pittsburgh_shapefile): mexican_restaurants = pd.read_csv(filepath + restaurant_data_file) gdf = gpd.GeoDataFrame( mexican_restaurants, geometry=gpd.points_from_xy(mexican_restaurants.longitude, mexican_restaurants.latitude), ) restaurant_locations = gdf.filter(items=["geometry"]) # import Pittsburgh neighborhood shapefile neighborhood_polygons = gpd.read_file(pittsburgh_shapefile).filter( items=["hood", "hood_no", "geometry"]) # spatial join to figure out which neighborhood each restaurant is in restaurants_in_polys = gpd.sjoin(restaurant_locations, neighborhood_polygons, how="inner", op="intersects") restaurants_counted = restaurants_in_polys.groupby( "hood_no").count().reset_index() restaurants_in_hoods = restaurants_counted.filter( items=["hood_no", "hood"]) restaurants_in_hoods.rename(columns={"hood": "num_restaurants"}, inplace=True) restaurants_per_shape = gpd.GeoDataFrame( pd.merge(neighborhood_polygons, restaurants_in_hoods, how="left")) restaurant_map = (p.ggplot(restaurants_per_shape) + p.geom_map(p.aes(fill="num_restaurants")) + p.scale_colour_gradient(low="white", high="black") + p.theme( panel_background=p.element_rect(fill="white"), axis_text_x=p.element_blank(), axis_text_y=p.element_blank(), axis_ticks_major_x=p.element_blank(), axis_ticks_major_y=p.element_blank(), )) + p.scale_fill_gradient( low="#efefef", high="#073763", name="# Restaurants") restaurant_map.save("restaurant_map.png")
def theme_tufte(base_size=11, base_family='serif', lines=True, ticks=True): """ Theme inspired by Chapter 6 'Data-Ink Maximization and Graphical Design` of Edward Tufte's 'The Visual Display of Quantitative Information`. Parameters ---------- base_size : int, optional Base font size. All text sizes are scaled versions of the base font size. Default is 11. base_family : str, optional Base font family. lines : bool, optional Draw axis spines. Default is True. ticks : bool, optional Draw axis ticks. Default is True. Returns ------- Plotnine theme. """ ret = (p9.theme_bw(base_size=base_size, base_family=base_family) + p9.theme(legend_background=p9.element_blank(), legend_key=p9.element_blank(), panel_background=p9.element_blank(), strip_background=p9.element_blank(), plot_background=p9.element_rect(fill='white'), axis_line=p9.element_line(size=0.5), axis_ticks=p9.element_line(size=0.5), panel_grid=p9.element_blank())) if not ticks: ret = ret + p9.theme(axis_ticks=p9.element_blank()) if not lines: ret = ret + p9.theme(axis_line=p9.element_blank()) return ret
input_data_UMAPencoded_df # In[12]: # Plot fig = ggplot(input_data_UMAPencoded_df, aes(x='1', y='2')) fig += geom_point(aes(color='dataset'), alpha=0.2) fig += labs(x ='UMAP 1', y = 'UMAP 2', title = 'UMAP of normalized compendium') fig += theme_bw() fig += theme( legend_title_align = "center", plot_background=element_rect(fill='white'), legend_key=element_rect(fill='white', colour='white'), legend_title=element_text(family='sans-serif', size=15), legend_text=element_text(family='sans-serif', size=12), plot_title=element_text(family='sans-serif', size=15), axis_text=element_text(family='sans-serif', size=12), axis_title=element_text(family='sans-serif', size=15) ) fig += guides(colour=guide_legend(override_aes={'alpha': 1})) fig += scale_color_manual(['#ff6666', '#add8e6']) print(fig) # **Observations:** # * There looks to be a good amount of variance in the compendium overall.
def generate_map(data, region, value_field, iso_field='iso', scale_params=None, plot_na_dots=False, tolerance=None, plot_size=8, out_region_color='#f0f0f0', na_color='#aaaaaa', line_color='#666666', projection=None): """ This function returns a map plot with the specified options. :param pandas.DataFrame data: Data to be plotted. :param str region: Region to center the map around. Countries outside the chosen region will be obscured. :param str value_field: Column of *data* with the values to be plotted. :param str iso_field: Column of *data* with the ISO3 codes for each country. :param dict scale_params: Dictionary of parameters to be passed to the ggplot corresponding color scale (continuous or discrete). :param bool plot_na_dots: Whether to plot the dots for small countries if said country doesn't have data available. :param int tolerance: Coordinate tolerance for polygon simplification, a higher number will result in simpler polygons and faster rendering (see DEFAULT_TOLERANCES). :param int plot_size: Size of the plot, which determines the relative sizes of the elements within. :param str out_region_color: Hex color of the countries that are out of the specified region. :param str na_color: Hex color of the countries with no data available. :param str line_color: Color of the country borders. :param str projection: Kind of map projection to be used in the map. Currently, Oceania (XOX) is only available in ESPG:4326 to enable wrapping. :returns: a ggplot-like plot with the map :rtype: plotnine.ggplot """ if projection is None: if region == 'XOX': projection = 'epsg4326' else: projection = 'robinson' if projection not in PROJECTION_DICT.keys(): raise ValueError('Projection "{}" not valid'.format(projection)) if scale_params is None: scale_params = {} if region not in REGION_BOUNDS[projection]: raise ValueError( '"region" not available. Valid regions are: {}'.format(', '.join( REGION_BOUNDS[projection].keys()))) if tolerance is None: tolerance = DEFAULT_TOLERANCES[projection][region] countries = GeoDataFrame.from_file( os.path.join(os.path.dirname(__file__), 'data/world-countries.shp')) # To plot Oceania we need the original EPSG:4326 to wrap around the 180º # longitude. In other cases transform to the desired projection. if region == 'XOX': countries.crs['lon_wrap'] = '180' # Wrap around longitude 180º XOX_countries = countries['continent'] == 'XOX' countries[XOX_countries] = countries[XOX_countries].to_crs( countries.crs) centroids = countries[XOX_countries].apply( lambda row: row['geometry'].centroid, axis=1) countries.loc[XOX_countries, 'lon'] = [c.x for c in centroids] countries.loc[XOX_countries, 'lat'] = [c.y for c in centroids] else: if projection != 'epsg4326': countries = countries.to_crs(PROJECTION_DICT[projection]) centroids = countries.apply(lambda row: row['geometry'].centroid, axis=1) countries['lon'] = [c.x for c in centroids] countries['lat'] = [c.y for c in centroids] countries['geometry'] = countries['geometry'].simplify(tolerance) upper_left, lower_right = REGION_BOUNDS[projection][region] limits_x = [upper_left[0], lower_right[0]] limits_y = [lower_right[1], upper_left[1]] ratio = (limits_x[1] - limits_x[0]) / (limits_y[1] - limits_y[0]) plot_data = pd.merge(countries, data, how='left', left_on='iso', right_on=iso_field) map_bounds = REGION_BOUNDS['epsg4326'][region] map_area = ((map_bounds[1][0] - map_bounds[0][0]) * (map_bounds[0][1] - map_bounds[1][1])) plot_data['plot_dot'] = (plot_data['pol_area'] < DOT_THRESHOLD * map_area) if not plot_na_dots: plot_data['plot_dot'] &= ~pd.isnull(plot_data[value_field]) if region != 'XWX': in_region = ((~pd.isnull(plot_data[value_field])) & (plot_data['continent'] == region)) in_region_missing = ((pd.isnull(plot_data[value_field])) & (plot_data['continent'] == region)) out_region = plot_data['continent'] != region else: in_region = ~pd.isnull(plot_data[value_field]) in_region_missing = pd.isnull(plot_data[value_field]) out_region = np.repeat(False, len(plot_data)) if plot_data[value_field].dtype == 'object': # Assume discrete values fill_scale = scale_fill_brewer(**scale_params, drop=False) else: # Assume continuous values fill_scale = scale_fill_gradient(**scale_params) plot_data_values = plot_data[in_region] plot_data_missing = plot_data[in_region_missing] plot_data_out_region = plot_data[out_region] dots_region = plot_data_values[plot_data_values['plot_dot']] dots_region_missing = plot_data_missing[plot_data_missing['plot_dot']] dots_out_region = plot_data_out_region[plot_data_out_region['plot_dot']] plt = ( ggplot() + geom_map(plot_data_values, aes(fill=value_field), color=line_color, size=0.3) + geom_map( plot_data_missing, aes(color='plot_dot'), fill=na_color, size=0.3) + geom_map(plot_data_out_region, fill=out_region_color, color=line_color, size=0.3) + geom_point(dots_region, aes(x='lon', y='lat', fill=value_field), size=3, stroke=.1, color=line_color) + geom_point(dots_region_missing, aes(x='lon', y='lat'), fill=na_color, size=3, stroke=.1, color=line_color) + geom_point(dots_out_region, aes(x='lon', y='lat'), fill=out_region_color, size=3, stroke=.1, color=line_color) + scale_x_continuous(breaks=[], limits=limits_x) + scale_y_continuous(breaks=[], limits=limits_y) + theme( figure_size=(plot_size * ratio, plot_size), panel_background=element_rect(fill='white', color='black'), # panel_border=element_rect(fill='white', # color='black', # size=.1), legend_background=element_rect( fill="white", color='black', size=.5), legend_box_just='left') + xlab('') + ylab('')) if len(plot_data_values.index) > 0: plt += fill_scale plt += scale_color_manual(name=' ', values=[line_color], breaks=[False], labels=['No data available']) if plot_data[value_field].dtype == 'object': plt += guides(fill=guide_legend(override_aes={'shape': None})) return { 'plot': plt, 'ratio': ratio, }
def plot_factor_spatial( adata, fact, cluster_names, fact_ind=[0], trans="log", sample_name=None, samples_col="sample", obs_x="imagecol", obs_y="imagerow", n_columns=6, max_col=5000, col_breaks=[0.1, 100, 1000, 3000], figure_size=(24, 5.7), point_size=0.8, text_size=9, ): r"""Plot expression of factors / cell types in space. Convenient but not as powerful as scanpy plotting. :param adata: anndata object with spatial data :param fact: pd.DataFrame with spatial expression of factors (W), e.g. mod.spot_factors_df :param cluster_names: names of those factors to show on a plot :param fact_ind: index of factors to plot :param trans: transform colorscale? passed to plotnine.scale_color_cmap :param sample_name: if anndata object contains multiple samples specify which sample to plot (no warning given if not) :param samples_col: if anndata object contains multiple which .obs columns specifies sample? :param obs_x: which .obs columns specifies x coordinate? :param obs_y: which .obs columns specifies y coordinate? :param n_columns: how many factors / clusters to plot in each row (plotnine.facet_grid) :param max_col: colorscale maximum expression in fact :param col_breaks: colorscale breaks :param figure_size: figures size works weirdly (only x axis has an effect, use 24 for 6-column plot, 12 for 3, 8 for 2 ...). :param point_size: point size of spots :param text_size: text size """ if sample_name is not None: sample_ind = np.isin(adata.obs[samples_col], sample_name) else: sample_ind = np.repeat(True, adata.shape[0]) # adata.obsm['X_spatial'][:,0] vs adata.obs['imagecol'] & adata.obs['imagerow'] for_plot = np.concatenate( ( adata.obs[obs_x].values.reshape((adata.obs.shape[0], 1)), -adata.obs[obs_y].values.reshape((adata.obs.shape[0], 1)), fact.iloc[:, fact_ind[0]].values.reshape((adata.obs.shape[0], 1)), np.array([ cluster_names[fact_ind[0]] for j in range(adata.obs.shape[0]) ]).reshape((adata.obs.shape[0], 1)), ), 1, ) for_plot = pd.DataFrame( for_plot, index=adata.obs.index, columns=["imagecol", "imagerow", "weights", "cluster"]) # select only correct sample for_plot = for_plot.loc[sample_ind, :] for i in fact_ind[1:]: for_plot1 = np.concatenate( ( adata.obs[obs_x].values.reshape((adata.obs.shape[0], 1)), -adata.obs[obs_y].values.reshape((adata.obs.shape[0], 1)), fact.iloc[:, i].values.reshape((adata.obs.shape[0], 1)), np.array([cluster_names[i] for j in range(adata.obs.shape[0])]).reshape( (adata.obs.shape[0], 1)), ), 1, ) for_plot1 = pd.DataFrame( for_plot1, index=adata.obs.index, columns=["imagecol", "imagerow", "weights", "cluster"]) # select only correct sample for_plot1 = for_plot1.loc[sample_ind, :] for_plot = pd.concat((for_plot, for_plot1)) for_plot["imagecol"] = pd.to_numeric(for_plot["imagecol"]) for_plot["imagerow"] = pd.to_numeric(for_plot["imagerow"]) for_plot["weights"] = pd.to_numeric(for_plot["weights"]) for_plot["cluster"] = pd.Categorical(for_plot["cluster"], categories=cluster_names[fact_ind], ordered=True) # print(np.log(np.max(for_plot['weights']))) ax = (plotnine.ggplot( for_plot, plotnine.aes("imagecol", "imagerow", color="weights")) + plotnine.geom_point(size=point_size) + plotnine.scale_color_cmap("magma", trans=trans, limits=[0.1, max_col], breaks=col_breaks + [max_col]) + plotnine.coord_fixed() + plotnine.theme_bw() + plotnine.theme( panel_background=plotnine.element_rect( fill="black", colour="black", size=0, linetype="solid"), panel_grid_major=plotnine.element_line( size=0, linetype="solid", colour="black"), panel_grid_minor=plotnine.element_line( size=0, linetype="solid", colour="black"), strip_text=plotnine.element_text(size=text_size), ) + plotnine.facet_wrap("~cluster", ncol=n_columns) + plotnine.ggtitle("nUMI from each cell type") + plotnine.theme(figure_size=figure_size)) return ax
) # Add back label column normalized_all_data_UMAPencoded_df["sample group"] = normalized_all_data[ "sample group"] # Plot fig = pn.ggplot(normalized_all_data_UMAPencoded_df, pn.aes(x="1", y="2")) fig += pn.geom_point(pn.aes(color="sample group"), alpha=0.4) fig += pn.labs(x="UMAP 1", y="UMAP 2", title="Gene expression data in gene space") fig += pn.theme_bw() fig += pn.theme( legend_title_align="center", plot_background=pn.element_rect(fill="white"), legend_key=pn.element_rect(fill="white", colour="white"), legend_title=pn.element_text(family="sans-serif", size=15), legend_text=pn.element_text(family="sans-serif", size=12), plot_title=pn.element_text(family="sans-serif", size=15), axis_text=pn.element_text(family="sans-serif", size=12), axis_title=pn.element_text(family="sans-serif", size=15), ) fig += pn.scale_color_manual(["#bdbdbd", "red", "blue"]) fig += pn.guides(colour=pn.guide_legend(override_aes={"alpha": 1})) fig += pn.scales.xlim(9, 10) print(fig) # - # Based on a UMAP of the normalized gene expression data, it looks like there isn't a clear separation between WT and mutant samples, though there are only 2 samples per group so this type of clustering observation is limited.
color ='darkgrey', size=0.5) \ + geom_errorbar(all_svcca[all_svcca['Group'] == 'uncorrected'], aes(x=lst_num_experiments, ymin='ymin', ymax='ymax'), color='darkgrey') \ + geom_line(threshold, aes(x=lst_num_experiments, y='score'), linetype='dashed', size=1, color="darkgrey", show_legend=False) \ + labs(x = "Number of Partitions", y = "Similarity score (SVCCA)", title = "Similarity across varying numbers of partitions") \ + theme(plot_title=element_text(weight='bold'), plot_background=element_rect(fill="white"), panel_background=element_rect(fill="white"), panel_grid_major_x=element_line(color="lightgrey"), panel_grid_major_y=element_line(color="lightgrey"), axis_line=element_line(color="grey"), legend_key=element_rect(fill='white', colour='white') ) \ + scale_color_manual(['#b3e5fc']) \ print(g) ggsave(plot=g, filename=svcca_uncorrected_file, dpi=300) # In[9]: # Plot - black lst_num_experiments = list(all_svcca.index[0:int(len(all_svcca.index) / 2)])
os.makedirs(output_figuresdir, exist_ok=True) output_file = pathlib.Path( output_figuresdir, "all_cellpainting_cellquality_across_sites.png" ) if check_if_write(output_file, force, throw_warning=True): cell_count_gg.save(output_file, dpi=300, width=10, height=7, verbose=False) # Same graph as above, separated by well. cell_count_gg_parsed = ( gg.ggplot(cell_count_df, gg.aes(x="site", y="cell_count")) + gg.geom_bar(gg.aes(fill="Cell_Quality"), stat="identity") + gg.theme_bw() + gg.theme( axis_text_x=gg.element_text(rotation=90, size=5), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), ) + gg.xlab("Sites") + gg.ylab("Cell Count") + gg.scale_fill_manual( name="Cell Quality", labels=cell_category_order, values=cell_category_colors ) + gg.facet_wrap("~well", drop=False, scales="free_x") ) output_file = pathlib.Path( output_figuresdir, "all_cellpainting_cellquality_across_sites_by_well.png" ) if check_if_write(output_file, force, throw_warning=True): cell_count_gg_parsed.save(output_file, dpi=300, width=10, height=7, verbose=False)
color ='darkgrey', size=0.5) \ + geom_errorbar(all_svcca, aes(x=lst_num_partitions, ymin='ymin', ymax='ymax'), color='darkgrey') \ + geom_line(threshold, aes(x=lst_num_partitions, y='score'), linetype='dashed', size=1, color="darkgrey", show_legend=False) \ + labs(x = "Number of Partitions", y = "Similarity score (SVCCA)", title = "Similarity across varying numbers of partitions") \ + theme(plot_title=element_text(weight='bold'), plot_background=element_rect(fill="white"), panel_background=element_rect(fill="white"), panel_grid_major_x=element_line(color="lightgrey"), panel_grid_major_y=element_line(color="lightgrey"), axis_line=element_line(color="grey"), legend_key=element_rect(fill='white', colour='white') ) \ + scale_color_manual(['#1976d2', '#b3e5fc']) \ print(panel_A) ggsave(plot=panel_A, filename=svcca_file, device="svg", dpi=300) ggsave(plot=panel_A, filename=svcca_png_file, device="svg", dpi=300) # ## Uncorrected PCA panel # In[9]: